diff --git a/.github/code_spell_ignore.txt b/.github/code_spell_ignore.txt
index e69de29bb2..4566d4f3a2 100644
--- a/.github/code_spell_ignore.txt
+++ b/.github/code_spell_ignore.txt
@@ -0,0 +1,2 @@
+ModelIn
+modelin
diff --git a/.github/license_template.txt b/.github/license_template.txt
index b43bb9dc80..a0410374d8 100644
--- a/.github/license_template.txt
+++ b/.github/license_template.txt
@@ -1,2 +1,2 @@
 Copyright (C) 2024 Intel Corporation
-SPDX-License-Identifier: Apache-2.0
\ No newline at end of file
+SPDX-License-Identifier: Apache-2.0
diff --git a/.github/workflows/_example-workflow.yml b/.github/workflows/_example-workflow.yml
index 07e857d61b..9b50d93490 100644
--- a/.github/workflows/_example-workflow.yml
+++ b/.github/workflows/_example-workflow.yml
@@ -40,6 +40,11 @@ on:
         default: "main"
         required: false
         type: string
+      inject_commit:
+        default: false
+        required: false
+        type: string
+
 jobs:
 ####################################################################################################
 # Image Build
@@ -72,6 +77,10 @@ jobs:
               git clone https://github.com/vllm-project/vllm.git
               cd vllm && git rev-parse HEAD && cd ../
           fi
+          if [[ $(grep -c "vllm-hpu:" ${docker_compose_path}) != 0 ]]; then
+               git clone https://github.com/HabanaAI/vllm-fork.git
+               cd vllm-fork && git rev-parse HEAD && cd ../
+          fi
           git clone https://github.com/opea-project/GenAIComps.git
           cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
 
@@ -83,6 +92,7 @@ jobs:
           docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
           service_list: ${{ inputs.services }}
           registry: ${OPEA_IMAGE_REPO}opea
+          inject_commit: ${{ inputs.inject_commit }}
           tag: ${{ inputs.tag }}
 
 ####################################################################################################
diff --git a/.github/workflows/_manifest-e2e.yml b/.github/workflows/_manifest-e2e.yml
index 69a080506d..fc414490da 100644
--- a/.github/workflows/_manifest-e2e.yml
+++ b/.github/workflows/_manifest-e2e.yml
@@ -90,10 +90,16 @@ jobs:
               echo "Validate ${{ inputs.example }} successful!"
             else
               echo "Validate ${{ inputs.example }} failure!!!"
-              .github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
+              echo "Check the logs in 'Dump logs when e2e test failed' step!!!"
+              exit 1
             fi
           fi
 
+      - name: Dump logs when e2e test failed
+        if: failure()
+        run: |
+          .github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
+
       - name: Kubectl uninstall
         if: always()
         run: |
diff --git a/.github/workflows/_run-docker-compose.yml b/.github/workflows/_run-docker-compose.yml
index fe86a60392..60bf70dcb8 100644
--- a/.github/workflows/_run-docker-compose.yml
+++ b/.github/workflows/_run-docker-compose.yml
@@ -141,7 +141,11 @@ jobs:
           flag=${flag#test_}
           yaml_file=$(find . -type f -wholename "*${{ inputs.hardware }}/${flag}.yaml")
           echo $yaml_file
-          docker compose -f $yaml_file stop && docker compose -f $yaml_file rm -f || true
+          container_list=$(cat $yaml_file | grep container_name | cut -d':' -f2)
+          for container_name in $container_list; do
+              cid=$(docker ps -aq --filter "name=$container_name")
+              if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+          done
           docker system prune -f
           docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
 
diff --git a/.github/workflows/check-online-doc-build.yml b/.github/workflows/check-online-doc-build.yml
new file mode 100644
index 0000000000..4972f398dc
--- /dev/null
+++ b/.github/workflows/check-online-doc-build.yml
@@ -0,0 +1,35 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Check Online Document Building
+permissions: {}
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - "**.md"
+      - "**.rst"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        path: GenAIExamples
+
+    - name: Checkout docs
+      uses: actions/checkout@v4
+      with:
+        repository: opea-project/docs
+        path: docs
+
+    - name: Build Online Document
+      shell: bash
+      run: |
+        echo "build online doc"
+        cd docs
+        bash scripts/build.sh
diff --git a/.github/workflows/manual-example-workflow.yml b/.github/workflows/manual-example-workflow.yml
index 03ba728c79..9e31f26d78 100644
--- a/.github/workflows/manual-example-workflow.yml
+++ b/.github/workflows/manual-example-workflow.yml
@@ -50,6 +50,11 @@ on:
         description: 'OPEA branch for image build'
         required: false
         type: string
+      inject_commit:
+        default: true
+        description: "inject commit to docker images true or false"
+        required: false
+        type: string
 
 permissions: read-all
 jobs:
@@ -101,4 +106,5 @@ jobs:
       test_k8s: ${{ fromJSON(inputs.test_k8s) }}
       test_gmc: ${{ fromJSON(inputs.test_gmc) }}
       opea_branch: ${{ inputs.opea_branch }}
+      inject_commit: ${{ inputs.inject_commit }}
     secrets: inherit
diff --git a/.github/workflows/manual-image-build.yml b/.github/workflows/manual-image-build.yml
index 8a0b0cf2c5..53ba750ed6 100644
--- a/.github/workflows/manual-image-build.yml
+++ b/.github/workflows/manual-image-build.yml
@@ -30,6 +30,12 @@ on:
         description: 'OPEA branch for image build'
         required: false
         type: string
+      inject_commit:
+        default: true
+        description: "inject commit to docker images true or false"
+        required: false
+        type: string
+
 jobs:
   get-test-matrix:
     runs-on: ubuntu-latest
@@ -56,4 +62,5 @@ jobs:
       services: ${{ inputs.services }}
       tag: ${{ inputs.tag }}
       opea_branch: ${{ inputs.opea_branch }}
+      inject_commit: ${{ inputs.inject_commit }}
     secrets: inherit
diff --git a/.github/workflows/nightly-docker-build-publish.yml b/.github/workflows/nightly-docker-build-publish.yml
new file mode 100644
index 0000000000..d30562224f
--- /dev/null
+++ b/.github/workflows/nightly-docker-build-publish.yml
@@ -0,0 +1,70 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Nightly build/publish latest docker images
+
+on:
+  schedule:
+    - cron: "30 13 * * *" # UTC time
+  workflow_dispatch:
+
+env:
+  EXAMPLES: "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"
+  TAG: "latest"
+  PUBLISH_TAGS: "latest"
+
+jobs:
+  get-build-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      examples_json: ${{ steps.get-matrix.outputs.examples_json }}
+      EXAMPLES: ${{ steps.get-matrix.outputs.EXAMPLES }}
+      TAG: ${{ steps.get-matrix.outputs.TAG }}
+      PUBLISH_TAGS: ${{ steps.get-matrix.outputs.PUBLISH_TAGS }}
+    steps:
+      - name: Create Matrix
+        id: get-matrix
+        run: |
+          examples=($(echo ${EXAMPLES} | tr ',' ' '))
+          examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "examples_json=$examples_json" >> $GITHUB_OUTPUT
+          echo "EXAMPLES=$EXAMPLES" >> $GITHUB_OUTPUT
+          echo "TAG=$TAG" >> $GITHUB_OUTPUT
+          echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT
+
+  build:
+    needs: get-build-matrix
+    strategy:
+      matrix:
+        example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
+      fail-fast: false
+    uses: ./.github/workflows/_example-workflow.yml
+    with:
+      node: gaudi
+      example: ${{ matrix.example }}
+    secrets: inherit
+
+  get-image-list:
+    needs: get-build-matrix
+    uses: ./.github/workflows/_get-image-list.yml
+    with:
+      examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}
+
+  publish:
+    needs: [get-build-matrix, get-image-list, build]
+    strategy:
+      matrix:
+        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
+    runs-on: "docker-build-gaudi"
+    steps:
+      - uses: docker/login-action@v3.2.0
+        with:
+          username: ${{ secrets.DOCKERHUB_USER }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Image Publish
+        uses: opea-project/validation/actions/image-publish@main
+        with:
+          local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ needs.get-build-matrix.outputs.TAG }}
+          image_name: opea/${{ matrix.image }}
+          publish_tags: ${{ needs.get-build-matrix.outputs.PUBLISH_TAGS }}
diff --git a/.github/workflows/pr-gmc-e2e.yaml b/.github/workflows/pr-gmc-e2e.yaml
index c2faf3a617..b0be26e993 100644
--- a/.github/workflows/pr-gmc-e2e.yaml
+++ b/.github/workflows/pr-gmc-e2e.yaml
@@ -12,7 +12,7 @@ on:
       - "**/tests/test_gmc**"
       - "!**.md"
       - "!**.txt"
-      - "!**/kubernetes/**/manifests/**"
+      - "!**/kubernetes/**/manifest/**"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml
index cf640cb147..c314bd614d 100644
--- a/.github/workflows/pr-path-detection.yml
+++ b/.github/workflows/pr-path-detection.yml
@@ -61,14 +61,14 @@ jobs:
           changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
           if  [ -n "$changed_files" ]; then
             for changed_file in $changed_files; do
-              echo $changed_file
+              # echo $changed_file
               url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
               if [ -n "$url_lines" ]; then
                 for url_line in $url_lines; do
-                  echo $url_line
+                  # echo $url_line
                   url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
                   path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
-                  response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
+                  response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
                   if [ "$response" -ne 200 ]; then
                     echo "**********Validation failed, try again**********"
                     response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
diff --git a/.github/workflows/scripts/get_test_matrix.sh b/.github/workflows/scripts/get_test_matrix.sh
index ac373f350a..a024617027 100644
--- a/.github/workflows/scripts/get_test_matrix.sh
+++ b/.github/workflows/scripts/get_test_matrix.sh
@@ -9,12 +9,15 @@ set -e
 changed_files=$changed_files
 test_mode=$test_mode
 run_matrix="{\"include\":["
-hardware_list="xeon gaudi" # current support hardware list
 
 examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
 for example in ${examples}; do
     cd $WORKSPACE/$example
     if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
+    cd tests
+    ls -l
+    hardware_list=$(find . -type f -name "test_compose*_on_*.sh" | cut -d/ -f2 | cut -d. -f1 | awk -F'_on_' '{print $2}'| sort -u)
+    echo "Test supported hardware list = ${hardware_list}"
 
     run_hardware=""
     if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '*.py|Dockerfile*|ui|docker_image_build' ) ]]; then
diff --git a/.gitignore b/.gitignore
index 3a5650d215..8b736f831e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,4 @@
 **/playwright/.cache/
 **/test-results/
 
-__pycache__/
\ No newline at end of file
+__pycache__/
diff --git a/.prettierignore b/.prettierignore
index 0978a006b9..4ab09a93b7 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -1 +1 @@
-**/kubernetes/
\ No newline at end of file
+**/kubernetes/
diff --git a/AgentQnA/README.md b/AgentQnA/README.md
index e6cfaf7c9f..9c351a856f 100644
--- a/AgentQnA/README.md
+++ b/AgentQnA/README.md
@@ -81,17 +81,13 @@ flowchart LR
 3. Hierarchical agent can further improve performance.
    Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.
 
-### Roadmap
+## Deployment with docker
 
-- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
-- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
-- v1.0 or later: agents use open-source llm backend.
-- v1.1 or later: add safeguards
+1. Build agent docker image
 
-## Getting started
+   Note: this is optional. The docker images will be automatically pulled when running the docker compose commands. This step is only needed if pulling images failed.
 
-1. Build agent docker image </br>
-   First, clone the opea GenAIComps repo
+   First, clone the opea GenAIComps repo.
 
    ```
    export WORKDIR=<your-work-directory>
@@ -106,35 +102,63 @@ flowchart LR
    docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
    ```
 
-2. Launch tool services </br>
-   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
-
-   ```
-   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
-   ```
-
-3. Set up environment for this example </br>
-   First, clone this repo
+2. Set up environment for this example </br>
+   First, clone this repo.
 
    ```
    cd $WORKDIR
    git clone https://github.com/opea-project/GenAIExamples.git
    ```
 
-   Second, set up env vars
+   Second, set up env vars.
 
    ```
    export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-   # optional: OPANAI_API_KEY
+   # for using open-source llms
+   export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
+   export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
+
+   # optional: OPANAI_API_KEY if you want to use OpenAI models
    export OPENAI_API_KEY=<your-openai-key>
    ```
 
-4. Launch agent services</br>
-   The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
-   To use openai llm, run command below.
+3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
+
+   First, launch the mega-service.
+
+   ```
+   cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
+   bash launch_retrieval_tool.sh
+   ```
+
+   Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
+
+   ```
+   bash run_ingest_data.sh
+   ```
+
+4. Launch other tools. </br>
+   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
+
+   ```
+   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
+   ```
+
+5. Launch agent services</br>
+   We provide two options for `llm_engine` of the agents: 1. open-source LLMs, 2. OpenAI models via API calls.
+
+   To use open-source LLMs on Gaudi2, run commands below.
+
+   ```
+   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
+   bash launch_tgi_gaudi.sh
+   bash launch_agent_service_tgi_gaudi.sh
+   ```
+
+   To use OpenAI models, run commands below.
 
    ```
-   cd docker_compose/intel/cpu/xeon
+   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
    bash launch_agent_service_openai.sh
    ```
 
@@ -143,10 +167,12 @@ flowchart LR
 First look at logs of the agent docker containers:
 
 ```
-docker logs docgrader-agent-endpoint
+# worker agent
+docker logs rag-agent-endpoint
 ```
 
 ```
+# supervisor agent
 docker logs react-agent-endpoint
 ```
 
@@ -170,4 +196,4 @@ curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: app
 
 ## How to register your own tools with agent
 
-You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md#5-customize-agent-strategy).
+You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/README.md b/AgentQnA/docker_compose/intel/cpu/xeon/README.md
new file mode 100644
index 0000000000..852a0476c6
--- /dev/null
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/README.md
@@ -0,0 +1,3 @@
+# Deployment on Xeon
+
+We deploy the retrieval tool on Xeon. For LLMs, we support OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
index bac5bbc627..837f2a0871 100644
--- a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
@@ -2,11 +2,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 services:
-  worker-docgrader-agent:
+  worker-rag-agent:
     image: opea/agent-langchain:latest
-    container_name: docgrader-agent-endpoint
+    container_name: rag-agent-endpoint
     volumes:
-      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
       - ${TOOLSET_PATH}:/home/user/tools/
     ports:
       - "9095:9095"
@@ -36,8 +35,9 @@ services:
   supervisor-react-agent:
     image: opea/agent-langchain:latest
     container_name: react-agent-endpoint
+    depends_on:
+      - worker-rag-agent
     volumes:
-      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
       - ${TOOLSET_PATH}:/home/user/tools/
     ports:
       - "9090:9090"
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh b/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
index 6c2094cc8e..f35e60fd13 100644
--- a/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
+++ b/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
@@ -7,7 +7,7 @@ export recursion_limit_worker=12
 export recursion_limit_supervisor=10
 export model="gpt-4o-mini-2024-07-18"
 export temperature=0
-export max_new_tokens=512
+export max_new_tokens=4096
 export OPENAI_API_KEY=${OPENAI_API_KEY}
 export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
 export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index 5200f757e3..6a9d0b4650 100644
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -2,37 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 services:
-  tgi-server:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
-    container_name: tgi-server
-    ports:
-      - "8085:80"
-    volumes:
-      - ${HF_CACHE_DIR}:/data
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      PT_HPU_ENABLE_LAZY_COLLECTIVES: true
-      ENABLE_HPU_GRAPH: true
-      LIMIT_HPU_GRAPH: true
-      USE_FLASH_ATTENTION: true
-      FLASH_ATTENTION_RECOMPUTE: true
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
-    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
-  worker-docgrader-agent:
+  worker-rag-agent:
     image: opea/agent-langchain:latest
-    container_name: docgrader-agent-endpoint
-    depends_on:
-      - tgi-server
+    container_name: rag-agent-endpoint
     volumes:
       # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
       - ${TOOLSET_PATH}:/home/user/tools/
@@ -41,7 +13,7 @@ services:
     ipc: host
     environment:
       ip_address: ${ip_address}
-      strategy: rag_agent
+      strategy: rag_agent_llama
       recursion_limit: ${recursion_limit_worker}
       llm_engine: tgi
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -66,8 +38,7 @@ services:
     image: opea/agent-langchain:latest
     container_name: react-agent-endpoint
     depends_on:
-      - tgi-server
-      - worker-docgrader-agent
+      - worker-rag-agent
     volumes:
       # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
       - ${TOOLSET_PATH}:/home/user/tools/
@@ -76,7 +47,7 @@ services:
     ipc: host
     environment:
       ip_address: ${ip_address}
-      strategy: react_langgraph
+      strategy: react_llama
       recursion_limit: ${recursion_limit_supervisor}
       llm_engine: tgi
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_tgi_gaudi.sh b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_tgi_gaudi.sh
index f4154fb229..966a037974 100644
--- a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_tgi_gaudi.sh
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_tgi_gaudi.sh
@@ -15,7 +15,7 @@ export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
 export NUM_SHARDS=4
 export LLM_ENDPOINT_URL="http://${ip_address}:8085"
 export temperature=0.01
-export max_new_tokens=512
+export max_new_tokens=4096
 
 # agent related environment variables
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
@@ -27,17 +27,3 @@ export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
 export CRAG_SERVER=http://${ip_address}:8080
 
 docker compose -f compose.yaml up -d
-
-sleep 5s
-echo "Waiting tgi gaudi ready"
-n=0
-until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
-    docker logs tgi-server &> tgi-gaudi-service.log
-    n=$((n+1))
-    if grep -q Connected tgi-gaudi-service.log; then
-        break
-    fi
-    sleep 5s
-done
-sleep 5s
-echo "Service started successfully"
diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
new file mode 100644
index 0000000000..75b2a9c7f4
--- /dev/null
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# LLM related environment variables
+export HF_CACHE_DIR=${HF_CACHE_DIR}
+ls $HF_CACHE_DIR
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
+export NUM_SHARDS=4
+
+docker compose -f tgi_gaudi.yaml up -d
+
+sleep 5s
+echo "Waiting tgi gaudi ready"
+n=0
+until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
+    docker logs tgi-server &> tgi-gaudi-service.log
+    n=$((n+1))
+    if grep -q Connected tgi-gaudi-service.log; then
+        break
+    fi
+    sleep 5s
+done
+sleep 5s
+echo "Service started successfully"
diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
new file mode 100644
index 0000000000..59c5671e15
--- /dev/null
+++ b/AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  tgi-server:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    container_name: tgi-server
+    ports:
+      - "8085:80"
+    volumes:
+      - ${HF_CACHE_DIR}:/data
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      PT_HPU_ENABLE_LAZY_COLLECTIVES: true
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
diff --git a/AgentQnA/tests/1_build_images.sh b/AgentQnA/tests/step1_build_images.sh
similarity index 100%
rename from AgentQnA/tests/1_build_images.sh
rename to AgentQnA/tests/step1_build_images.sh
diff --git a/AgentQnA/tests/2_start_retrieval_tool.sh b/AgentQnA/tests/step2_start_retrieval_tool.sh
similarity index 100%
rename from AgentQnA/tests/2_start_retrieval_tool.sh
rename to AgentQnA/tests/step2_start_retrieval_tool.sh
diff --git a/AgentQnA/tests/3_ingest_data_and_validate_retrieval.sh b/AgentQnA/tests/step3_ingest_data_and_validate_retrieval.sh
similarity index 100%
rename from AgentQnA/tests/3_ingest_data_and_validate_retrieval.sh
rename to AgentQnA/tests/step3_ingest_data_and_validate_retrieval.sh
diff --git a/AgentQnA/tests/4_launch_and_validate_agent_openai.sh b/AgentQnA/tests/step4_launch_and_validate_agent_openai.sh
similarity index 100%
rename from AgentQnA/tests/4_launch_and_validate_agent_openai.sh
rename to AgentQnA/tests/step4_launch_and_validate_agent_openai.sh
diff --git a/AgentQnA/tests/4_launch_and_validate_agent_tgi.sh b/AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
similarity index 64%
rename from AgentQnA/tests/4_launch_and_validate_agent_tgi.sh
rename to AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
index f7b36da2a3..fde46e0d5a 100644
--- a/AgentQnA/tests/4_launch_and_validate_agent_tgi.sh
+++ b/AgentQnA/tests/step4_launch_and_validate_agent_tgi.sh
@@ -17,6 +17,12 @@ if [ ! -d "$HF_CACHE_DIR" ]; then
 fi
 ls $HF_CACHE_DIR
 
+function start_tgi(){
+    echo "Starting tgi-gaudi server"
+    cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
+    bash launch_tgi_gaudi.sh
+
+}
 
 function start_agent_and_api_server() {
     echo "Starting CRAG server"
@@ -25,6 +31,7 @@ function start_agent_and_api_server() {
     echo "Starting Agent services"
     cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
     bash launch_agent_service_tgi_gaudi.sh
+    sleep 10
 }
 
 function validate() {
@@ -43,18 +50,22 @@ function validate() {
 
 function validate_agent_service() {
     echo "----------------Test agent ----------------"
-    local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "query": "Tell me about Michael Jackson song thriller"
-    }')
-    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
-    docker logs docgrader-agent-endpoint
+    # local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+    #  "query": "Tell me about Michael Jackson song thriller"
+    # }')
+    export agent_port="9095"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
+    local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
+    docker logs rag-agent-endpoint
     if [ "$EXIT_CODE" == "1" ]; then
         exit 1
     fi
 
-    local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
-     "query": "Tell me about Michael Jackson song thriller"
-    }')
+    # local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+    #  "query": "Tell me about Michael Jackson song thriller"
+    # }')
+    export agent_port="9090"
+    local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
     local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
     docker logs react-agent-endpoint
     if [ "$EXIT_CODE" == "1" ]; then
@@ -64,6 +75,10 @@ function validate_agent_service() {
 }
 
 function main() {
+    echo "==================== Start TGI ===================="
+    start_tgi
+    echo "==================== TGI started ===================="
+
     echo "==================== Start agent ===================="
     start_agent_and_api_server
     echo "==================== Agent started ===================="
diff --git a/AgentQnA/tests/test.py b/AgentQnA/tests/test.py
new file mode 100644
index 0000000000..f0ef934412
--- /dev/null
+++ b/AgentQnA/tests/test.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import requests
+
+
+def generate_answer_agent_api(url, prompt):
+    proxies = {"http": ""}
+    payload = {
+        "query": prompt,
+    }
+    response = requests.post(url, json=payload, proxies=proxies)
+    answer = response.json()["text"]
+    return answer
+
+
+if __name__ == "__main__":
+    ip_address = os.getenv("ip_address", "localhost")
+    agent_port = os.getenv("agent_port", "9095")
+    url = f"http://{ip_address}:{agent_port}/v1/chat/completions"
+    prompt = "Tell me about Michael Jackson song thriller"
+    answer = generate_answer_agent_api(url, prompt)
+    print(answer)
diff --git a/AgentQnA/tests/test_compose_on_gaudi.sh b/AgentQnA/tests/test_compose_on_gaudi.sh
index efe1aeeecd..5f7e899dcf 100644
--- a/AgentQnA/tests/test_compose_on_gaudi.sh
+++ b/AgentQnA/tests/test_compose_on_gaudi.sh
@@ -19,7 +19,6 @@ function stop_crag() {
 
 function stop_agent_docker() {
     cd $WORKPATH/docker_compose/intel/hpu/gaudi/
-    # docker compose -f compose.yaml down
     container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
     for container_name in $container_list; do
         cid=$(docker ps -aq --filter "name=$container_name")
@@ -28,11 +27,21 @@ function stop_agent_docker() {
     done
 }
 
+function stop_tgi(){
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi/
+    container_list=$(cat tgi_gaudi.yaml | grep container_name | cut -d':' -f2)
+    for container_name in $container_list; do
+        cid=$(docker ps -aq --filter "name=$container_name")
+        echo "Stopping container $container_name"
+        if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+    done
+
+}
+
 function stop_retrieval_tool() {
     echo "Stopping Retrieval tool"
     local RETRIEVAL_TOOL_PATH=$WORKPATH/../DocIndexRetriever
     cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/
-    # docker compose -f compose.yaml down
     container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
     for container_name in $container_list; do
         cid=$(docker ps -aq --filter "name=$container_name")
@@ -43,25 +52,26 @@ function stop_retrieval_tool() {
 echo "workpath: $WORKPATH"
 echo "=================== Stop containers ===================="
 stop_crag
+stop_tgi
 stop_agent_docker
 stop_retrieval_tool
 
 cd $WORKPATH/tests
 
 echo "=================== #1 Building docker images===================="
-bash 1_build_images.sh
+bash step1_build_images.sh
 echo "=================== #1 Building docker images completed===================="
 
 echo "=================== #2 Start retrieval tool===================="
-bash 2_start_retrieval_tool.sh
+bash step2_start_retrieval_tool.sh
 echo "=================== #2 Retrieval tool started===================="
 
 echo "=================== #3 Ingest data and validate retrieval===================="
-bash 3_ingest_data_and_validate_retrieval.sh
+bash step3_ingest_data_and_validate_retrieval.sh
 echo "=================== #3 Data ingestion and validation completed===================="
 
 echo "=================== #4 Start agent and API server===================="
-bash 4_launch_and_validate_agent_tgi.sh
+bash step4_launch_and_validate_agent_tgi.sh
 echo "=================== #4 Agent test passed ===================="
 
 echo "=================== #5 Stop agent and API server===================="
@@ -70,4 +80,6 @@ stop_agent_docker
 stop_retrieval_tool
 echo "=================== #5 Agent and API server stopped===================="
 
+echo y | docker system prune
+
 echo "ALL DONE!"
diff --git a/AgentQnA/tools/supervisor_agent_tools.yaml b/AgentQnA/tools/supervisor_agent_tools.yaml
index 58110e5292..4b53cc9f9f 100644
--- a/AgentQnA/tools/supervisor_agent_tools.yaml
+++ b/AgentQnA/tools/supervisor_agent_tools.yaml
@@ -25,7 +25,7 @@ get_billboard_rank_date:
   args_schema:
     rank:
       type: int
-      description: song name
+      description: the rank of interest, for example 1 for top 1
     date:
       type: str
       description: date
diff --git a/AgentQnA/tools/worker_agent_tools.py b/AgentQnA/tools/worker_agent_tools.py
index 1dfdb8409e..fded38ec3a 100644
--- a/AgentQnA/tools/worker_agent_tools.py
+++ b/AgentQnA/tools/worker_agent_tools.py
@@ -12,16 +12,31 @@ def search_knowledge_base(query: str) -> str:
     print(url)
     proxies = {"http": ""}
     payload = {
-        "text": query,
+        "messages": query,
     }
     response = requests.post(url, json=payload, proxies=proxies)
     print(response)
-    docs = response.json()["documents"]
-    context = ""
-    for i, doc in enumerate(docs):
-        if i == 0:
-            context = doc
-        else:
-            context += "\n" + doc
-    print(context)
-    return context
+    if "documents" in response.json():
+        docs = response.json()["documents"]
+        context = ""
+        for i, doc in enumerate(docs):
+            if i == 0:
+                context = doc
+            else:
+                context += "\n" + doc
+        # print(context)
+        return context
+    elif "text" in response.json():
+        return response.json()["text"]
+    elif "reranked_docs" in response.json():
+        docs = response.json()["reranked_docs"]
+        context = ""
+        for i, doc in enumerate(docs):
+            if i == 0:
+                context = doc["text"]
+            else:
+                context += "\n" + doc["text"]
+        # print(context)
+        return context
+    else:
+        return "Error parsing response from the knowledge base."
diff --git a/AudioQnA/Dockerfile b/AudioQnA/Dockerfile
index e2273d381b..265c9c9b5d 100644
--- a/AudioQnA/Dockerfile
+++ b/AudioQnA/Dockerfile
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./audioqna.py /home/user/audioqna.py
diff --git a/AudioQnA/Dockerfile.multilang b/AudioQnA/Dockerfile.multilang
index c62cb04048..ef7c926975 100644
--- a/AudioQnA/Dockerfile.multilang
+++ b/AudioQnA/Dockerfile.multilang
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py
diff --git a/AudioQnA/benchmark/performance/README.md b/AudioQnA/benchmark/performance/README.md
new file mode 100644
index 0000000000..3d1bbc1c92
--- /dev/null
+++ b/AudioQnA/benchmark/performance/README.md
@@ -0,0 +1,77 @@
+# AudioQnA Benchmarking
+
+This folder contains a collection of scripts to enable inference benchmarking by leveraging a comprehensive benchmarking tool, [GenAIEval](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md), that enables throughput analysis to assess inference performance.
+
+By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
+
+## Purpose
+
+We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
+
+- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
+- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
+- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
+
+## Metrics
+
+The benchmark will report the below metrics, including:
+
+- Number of Concurrent Requests
+- End-to-End Latency: P50, P90, P99 (in milliseconds)
+- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
+- Average Next Token Latency (in milliseconds)
+- Average Token Latency (in milliseconds)
+- Requests Per Second (RPS)
+- Output Tokens Per Second
+- Input Tokens Per Second
+
+Results will be displayed in the terminal and saved as CSV file named `1_stats.csv` for easy export to spreadsheets.
+
+## Getting Started
+
+We recommend using Kubernetes to deploy the AudioQnA service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs.
+
+### Prerequisites
+
+- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
+
+- Every node has direct internet access
+- Set up kubectl on the master node with access to the Kubernetes cluster.
+- Install Python 3.8+ on the master node for running GenAIEval.
+- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
+- Ensure that the container's ulimit can meet the the number of requests.
+
+```bash
+# The way to modify the containered ulimit:
+sudo systemctl edit containerd
+# Add two lines:
+[Service]
+LimitNOFILE=65536:1048576
+
+sudo systemctl daemon-reload; sudo systemctl restart containerd
+```
+
+### Test Steps
+
+Please deploy AudioQnA service before benchmarking.
+
+##### Run Benchmark Test
+
+Before the benchmark, we can configure the number of test queries and test output directory by:
+
+```bash
+export USER_QUERIES="[128, 128, 128, 128]"
+export TEST_OUTPUT_DIR="/tmp/benchmark_output"
+```
+
+And then run the benchmark by:
+
+```bash
+bash benchmark.sh -n <node_count>
+```
+
+The argument `-n` refers to the number of test nodes.
+
+##### 4. Data collection
+
+All the test results will come to this folder `/tmp/benchmark_output` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
diff --git a/AudioQnA/benchmark/performance/benchmark.sh b/AudioQnA/benchmark/performance/benchmark.sh
new file mode 100644
index 0000000000..2930c7753f
--- /dev/null
+++ b/AudioQnA/benchmark/performance/benchmark.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+deployment_type="k8s"
+node_number=1
+service_port=8888
+query_per_node=128
+
+benchmark_tool_path="$(pwd)/GenAIEval"
+
+usage() {
+    echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
+    echo "  -d deployment_type    AudioQnA deployment type, select between k8s and docker (default: k8s)"
+    echo "  -n node_number        Test node number, required only for k8s deployment_type, (default: 1)"
+    echo "  -i service_ip         AudioQnA service ip, required only for docker deployment_type"
+    echo "  -p service_port       AudioQnA service port, required only for docker deployment_type, (default: 8888)"
+    exit 1
+}
+
+while getopts ":d:n:i:p:" opt; do
+    case ${opt} in
+        d )
+            deployment_type=$OPTARG
+            ;;
+        n )
+            node_number=$OPTARG
+            ;;
+        i )
+            service_ip=$OPTARG
+            ;;
+        p )
+            service_port=$OPTARG
+            ;;
+        \? )
+            echo "Invalid option: -$OPTARG" 1>&2
+            usage
+            ;;
+        : )
+            echo "Invalid option: -$OPTARG requires an argument" 1>&2
+            usage
+            ;;
+    esac
+done
+
+if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
+    echo "Error: service_ip is required for docker deployment_type" 1>&2
+    usage
+fi
+
+if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
+    echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
+fi
+
+function main() {
+    if [[ ! -d ${benchmark_tool_path} ]]; then
+        echo "Benchmark tool not found, setting up..."
+        setup_env
+    fi
+    run_benchmark
+}
+
+function setup_env() {
+    git clone https://github.com/opea-project/GenAIEval.git
+    pushd ${benchmark_tool_path}
+    python3 -m venv stress_venv
+    source stress_venv/bin/activate
+    pip install -r requirements.txt
+    popd
+}
+
+function run_benchmark() {
+    source ${benchmark_tool_path}/stress_venv/bin/activate
+    export DEPLOYMENT_TYPE=${deployment_type}
+    export SERVICE_IP=${service_ip:-"None"}
+    export SERVICE_PORT=${service_port:-"None"}
+    if [[ -z $USER_QUERIES ]]; then
+        user_query=$((query_per_node*node_number))
+        export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
+        echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
+    fi
+    export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
+    if [[ -z $WARMUP ]]; then export WARMUP=0; fi
+    if [[ -z $TEST_OUTPUT_DIR ]]; then
+        if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
+        else
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
+        fi
+        echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
+    fi
+
+    envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
+    cd ${benchmark_tool_path}/evals/benchmark
+    python benchmark.py
+}
+
+main
diff --git a/AudioQnA/benchmark/performance/benchmark.yaml b/AudioQnA/benchmark/performance/benchmark.yaml
new file mode 100644
index 0000000000..659a99a759
--- /dev/null
+++ b/AudioQnA/benchmark/performance/benchmark.yaml
@@ -0,0 +1,52 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+test_suite_config: # Overall configuration settings for the test suite
+  examples: ["audioqna"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
+  deployment_type: "k8s"  # Default is "k8s", can also be "docker"
+  service_ip: None  # Leave as None for k8s, specify for Docker
+  service_port: None  # Leave as None for k8s, specify for Docker
+  warm_ups: 0  # Number of test requests for warm-up
+  run_time: 60m  # The max total run time for the test suite
+  seed:  # The seed for all RNGs
+  user_queries: [1, 2, 4, 8, 16, 32, 64, 128]  # Number of test requests at each concurrency level
+  query_timeout: 120  # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
+  random_prompt: false  # Use random prompts if true, fixed prompts if false
+  collect_service_metric: false  # Collect service metrics if true, do not collect service metrics if false
+  data_visualization: false # Generate data visualization if true, do not generate data visualization if false
+  llm_model: "Intel/neural-chat-7b-v3-3"  # The LLM model used for the test
+  test_output_dir: "/tmp/benchmark_output"  # The directory to store the test output
+  load_shape:              # Tenant concurrency pattern
+    name: constant           # poisson or constant(locust default load shape)
+    params:                  # Loadshape-specific parameters
+      constant:                # Poisson load shape specific parameters, activate only if load_shape is poisson
+        concurrent_level: 4      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
+      poisson:                 # Poisson load shape specific parameters, activate only if load_shape is poisson
+        arrival-rate: 1.0        # Request arrival rate
+  namespace: "" # Fill the user-defined namespace. Otherwise, it will be default.
+
+test_cases:
+  audioqna:
+    asr:
+      run_test: true
+      service_name: "asr-svc"  # Replace with your service name
+    llm:
+      run_test: true
+      service_name: "llm-svc"  # Replace with your service name
+      parameters:
+        model_name: "Intel/neural-chat-7b-v3-3"
+        max_new_tokens: 128
+        temperature: 0.01
+        top_k: 10
+        top_p: 0.95
+        repetition_penalty: 1.03
+        streaming: true
+    llmserve:
+      run_test: true
+      service_name: "llm-svc"  # Replace with your service name
+    tts:
+      run_test: true
+      service_name: "tts-svc"  # Replace with your service name
+    e2e:
+      run_test: true
+      service_name: "audioqna-backend-server-svc"  # Replace with your service name
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
index a0ef81d172..ea3c45b919 100644
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -41,7 +41,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
index d8ca1d7f8c..3e20dbc4af 100644
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
@@ -26,7 +26,7 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index c3f885fcee..b536522c4f 100644
--- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -51,7 +51,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"
diff --git a/AudioQnA/kubernetes/intel/README.md b/AudioQnA/kubernetes/intel/README.md
index 27948ed8b7..07bc6c1a13 100644
--- a/AudioQnA/kubernetes/intel/README.md
+++ b/AudioQnA/kubernetes/intel/README.md
@@ -7,14 +7,14 @@
 
 ## Deploy On Xeon
 ```
-cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifests
+cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
 kubectl apply -f audioqna.yaml
 ```
 ## Deploy On Gaudi
 ```
-cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifests
+cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
 kubectl apply -f audioqna.yaml
diff --git a/AudioQnA/kubernetes/intel/README_gmc.md b/AudioQnA/kubernetes/intel/README_gmc.md
index 30d879e196..767fdf3667 100644
--- a/AudioQnA/kubernetes/intel/README_gmc.md
+++ b/AudioQnA/kubernetes/intel/README_gmc.md
@@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
 For Gaudi:
 
-- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
+- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.6
 - whisper-gaudi: opea/whisper-gaudi:latest
 - speecht5-gaudi: opea/speecht5-gaudi:latest
 
diff --git a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
index bd76774835..6856d2b878 100644
--- a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
@@ -247,7 +247,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: audio-qna-config
-        image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+        image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:
diff --git a/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml b/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
index 2d0c567e3a..6659a7811a 100644
--- a/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
@@ -271,7 +271,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: audio-qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:
diff --git a/AudioQnA/tests/test_compose_on_gaudi.sh b/AudioQnA/tests/test_compose_on_gaudi.sh
index 69270736d6..e626b2671a 100644
--- a/AudioQnA/tests/test_compose_on_gaudi.sh
+++ b/AudioQnA/tests/test_compose_on_gaudi.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker images && sleep 1s
 }
 
diff --git a/AudioQnA/tests/test_compose_on_xeon.sh b/AudioQnA/tests/test_compose_on_xeon.sh
index b36b5c7de9..926a51a33f 100644
--- a/AudioQnA/tests/test_compose_on_xeon.sh
+++ b/AudioQnA/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="audioqna whisper asr llm-tgi speecht5 tts"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker images && sleep 1s
 }
 
diff --git a/AudioQnA/ui/docker/Dockerfile b/AudioQnA/ui/docker/Dockerfile
index ac2bb7da31..1d5115f4b5 100644
--- a/AudioQnA/ui/docker/Dockerfile
+++ b/AudioQnA/ui/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN npm run build
 EXPOSE 5173
 
 # Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
\ No newline at end of file
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
diff --git a/AudioQnA/ui/svelte/src/app.postcss b/AudioQnA/ui/svelte/src/app.postcss
index c3e0519c6a..4b957234dc 100644
--- a/AudioQnA/ui/svelte/src/app.postcss
+++ b/AudioQnA/ui/svelte/src/app.postcss
@@ -79,4 +79,4 @@ a.btn {
 
 .w-12\/12 {
 	width: 100%
-}
\ No newline at end of file
+}
diff --git a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/1.svg b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/1.svg
index 38adea6ffc..71ac8d5fcc 100644
--- a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/1.svg
+++ b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/1.svg
@@ -89,4 +89,4 @@
             <stop offset="1" stop-color="#3300FF" stop-opacity="0.2" />
         </linearGradient>
     </defs>
-</svg>
\ No newline at end of file
+</svg>
diff --git a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/2.svg b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/2.svg
index 0e6150e4ae..95d4056589 100644
--- a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/2.svg
+++ b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/2.svg
@@ -89,4 +89,4 @@
             <stop offset="1" stop-color="#f3f4f6" stop-opacity="0" />
         </linearGradient>
     </defs>
-</svg>
\ No newline at end of file
+</svg>
diff --git a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/3.svg b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/3.svg
index 3ed7f7fc5b..310d437e5d 100644
--- a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/3.svg
+++ b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/3.svg
@@ -76,4 +76,4 @@
             <stop offset="1" stop-color="#9CFFED" stop-opacity="0" />
         </linearGradient>
     </defs>
-</svg>
\ No newline at end of file
+</svg>
diff --git a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/4.svg b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/4.svg
index 2b34e86b01..f3281671de 100644
--- a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/4.svg
+++ b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/4.svg
@@ -76,4 +76,4 @@
             <stop offset="1" stop-color="#6141E1" stop-opacity="0" />
         </linearGradient>
     </defs>
-</svg>
\ No newline at end of file
+</svg>
diff --git a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/5.svg b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/5.svg
index 718f3b304b..8a5864192d 100644
--- a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/5.svg
+++ b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/5.svg
@@ -89,4 +89,4 @@
             <stop offset="1" stop-color="#3300FF" stop-opacity="0" />
         </linearGradient>
     </defs>
-</svg>
\ No newline at end of file
+</svg>
diff --git a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/stop-recording.svg b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/stop-recording.svg
index 4f4e638bab..82e497ab04 100644
--- a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/stop-recording.svg
+++ b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/stop-recording.svg
@@ -3,4 +3,4 @@
     <path
         d="M512 1024a512 512 0 1 1 512-512 512 512 0 0 1-512 512z m0-896a384 384 0 1 0 384 384A384 384 0 0 0 512 128z m128 576h-256a64 64 0 0 1-64-64v-256a64 64 0 0 1 64-64h256a64 64 0 0 1 64 64v256a64 64 0 0 1-64 64z"
         fill="#d81e06" p-id="3104"></path>
-</svg>
\ No newline at end of file
+</svg>
diff --git a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/upload.svg b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/upload.svg
index 55790f05f7..5264818ebf 100644
--- a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/upload.svg
+++ b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/upload.svg
@@ -1 +1 @@
-<svg t="1713431562066" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="6399" width="32" height="32"><path d="M592 768h-160c-26.6 0-48-21.4-48-48V384h-175.4c-35.6 0-53.4-43-28.2-68.2L484.6 11.4c15-15 39.6-15 54.6 0l304.4 304.4c25.2 25.2 7.4 68.2-28.2 68.2H640v336c0 26.6-21.4 48-48 48z m432-16v224c0 26.6-21.4 48-48 48H48c-26.6 0-48-21.4-48-48V752c0-26.6 21.4-48 48-48h272v16c0 61.8 50.2 112 112 112h160c61.8 0 112-50.2 112-112v-16h272c26.6 0 48 21.4 48 48z m-248 176c0-22-18-40-40-40s-40 18-40 40 18 40 40 40 40-18 40-40z m128 0c0-22-18-40-40-40s-40 18-40 40 18 40 40 40 40-18 40-40z" p-id="6400" fill="#ffffff"></path></svg>
\ No newline at end of file
+<svg t="1713431562066" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="6399" width="32" height="32"><path d="M592 768h-160c-26.6 0-48-21.4-48-48V384h-175.4c-35.6 0-53.4-43-28.2-68.2L484.6 11.4c15-15 39.6-15 54.6 0l304.4 304.4c25.2 25.2 7.4 68.2-28.2 68.2H640v336c0 26.6-21.4 48-48 48z m432-16v224c0 26.6-21.4 48-48 48H48c-26.6 0-48-21.4-48-48V752c0-26.6 21.4-48 48-48h272v16c0 61.8 50.2 112 112 112h160c61.8 0 112-50.2 112-112v-16h272c26.6 0 48 21.4 48 48z m-248 176c0-22-18-40-40-40s-40 18-40 40 18 40 40 40 40-18 40-40z m128 0c0-22-18-40-40-40s-40 18-40 40 18 40 40 40 40-18 40-40z" p-id="6400" fill="#ffffff"></path></svg>
diff --git a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voice.svg b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voice.svg
index 0cc1d520e4..2d1375e1a5 100644
--- a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voice.svg
+++ b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voice.svg
@@ -6,4 +6,4 @@
     <path
         d="M864 479.776 864 352c0-17.664-14.304-32-32-32s-32 14.336-32 32l0 127.776c0 160.16-129.184 290.464-288 290.464-158.784 0-288-130.304-288-290.464L224 352c0-17.664-14.336-32-32-32s-32 14.336-32 32l0 127.776c0 184.608 140.864 336.48 320 352.832L480 896 288 896c-17.664 0-32 14.304-32 32s14.336 32 32 32l448 0c17.696 0 32-14.304 32-32s-14.304-32-32-32l-192 0 0-63.36C723.136 816.256 864 664.384 864 479.776z"
         fill="#707070" p-id="2962"></path>
-</svg>
\ No newline at end of file
+</svg>
diff --git a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voiceOff.svg b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voiceOff.svg
index 8161062a4c..fe9b59ee83 100644
--- a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voiceOff.svg
+++ b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voiceOff.svg
@@ -1 +1 @@
-<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="21"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="0.5" x2="0.5" y1="0" y2="0.7433493"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="28"><g node-id="29"><g node-id="30"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#fdc96c" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="13" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="31"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#fdc96c" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="14" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="32"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#fdc96c" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="15" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="33"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#fdc96c" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="16" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#fdc96c" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="17" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 196.04 175.96 L 196.49 170.94 L 197.50 166.10 L 199.08 161.41 L 201.17 156.91 L 203.72 152.70 L 206.73 148.76 L 210.14 145.15 L 213.88 141.97 L 217.98 139.19 L 222.36 136.88 L 226.97 135.12 L 231.84 133.88 L 235.72 133.25 L 239.53 133.00 L 243.30 133.13 L 247.06 133.62 L 251.96 134.81 L 256.66 136.54 L 261.19 138.81 L 265.44 141.57 L 269.34 144.74 L 272.91 148.35 L 276.07 152.31 L 278.72 156.56 L 280.89 161.12 L 282.16 164.69 L 283.06 168.36 L 283.61 172.13 L 283.80 176.04 L 271.28 175.94 L 265.04 176.02 L 258.81 176.56 L 257.55 177.24 L 256.57 178.08 L 255.83 179.08 L 255.13 180.82 L 254.97 182.66 L 255.32 184.47 L 256.18 186.10 L 257.01 187.00 L 258.07 187.70 L 259.40 188.20 L 265.51 188.66 L 271.65 188.74 L 283.92 188.74 L 283.89 205.80 L 271.51 205.82 L 265.32 205.94 L 259.19 206.53 L 257.80 207.05 L 256.73 207.77 L 255.91 208.70 L 255.11 210.36 L 254.85 212.21 L 255.13 214.07 L 255.93 215.79 L 256.73 216.77 L 257.76 217.56 L 259.05 218.15 L 263.13 218.73 L 267.24 218.98 L 275.52 218.94 L 283.78 218.86 L 283.78 234.52 L 272.23 234.41 L 266.45 234.45 L 260.69 234.75 L 259.32 234.97 L 258.13 235.46 L 257.10 236.22 L 255.87 237.71 L 255.10 239.50 L 254.84 241.42 L 255.14 243.29 L 255.67 244.43 L 256.50 245.42 L 257.68 246.29 L 262.01 246.92 L 266.39 247.19 L 275.20 247.15 L 279.61 247.11 L 284.00 247.22 L 283.42 252.72 L 282.34 258.19 L 281.31 261.76 L 280.01 265.21 L 278.44 268.53 L 276.56 271.68 L 274.34 274.61 L 271.78 277.34 L 268.48 280.56 L 264.88 283.35 L 260.98 285.72 L 256.83 287.67 L 252.54 289.17 L 248.07 290.24 L 243.52 290.85 L 238.97 290.99 L 234.40 290.67 L 229.89 289.87 L 225.53 288.58 L 221.28 286.80 L 217.56 284.87 L 214.12 282.63 L 210.94 280.08 L 208.00 277.19 L 205.35 274.04 L 203.01 270.70 L 200.99 267.16 L 199.27 263.39 L 197.90 259.49 L 196.90 255.52 L 196.26 251.46 L 196.00 247.29 L 204.77 247.15 L 213.55 247.18 L 217.92 246.99 L 222.28 246.54 L 223.61 245.91 L 224.60 245.08 L 225.30 244.03 L 225.87 242.21 L 225.86 240.23 L 225.30 238.32 L 224.24 236.66 L 223.27 235.77 L 222.12 235.16 L 220.73 234.82 L 214.60 234.46 L 208.45 234.41 L 196.14 234.52 L 196.16 218.86 L 208.44 218.98 L 214.59 218.92 L 220.73 218.53 L 222.14 218.15 L 223.29 217.53 L 224.25 216.66 L 225.31 215.02 L 225.86 213.14 L 225.86 211.20 L 225.29 209.37 L 224.59 208.31 L 223.61 207.44 L 222.30 206.74 L 217.98 206.14 L 213.63 205.86 L 204.87 205.86 L 196.12 205.78 L 196.07 188.75 L 208.35 188.73 L 214.48 188.68 L 220.61 188.35 L 222.00 188.08 L 223.15 187.54 L 224.09 186.75 L 225.14 185.22 L 225.72 183.41 L 225.77 181.49 L 225.29 179.64 L 224.68 178.52 L 223.80 177.57 L 222.64 176.77 L 218.24 176.19 L 213.81 175.94 L 204.92 175.99 L 196.04 175.96 L 196.04 175.96 Z" fill="#ffffff" fill-rule="nonzero" group-id="1" id="路径" node-id="18" stroke="none" target-height="157.98999" target-width="88" target-x="196" target-y="133"></path><path d="M 163.29 228.96 L 163.65 226.86 L 164.41 225.13 L 165.57 223.69 L 167.05 222.52 L 168.69 221.69 L 170.53 221.19 L 172.44 221.04 L 174.25 221.27 L 176.01 221.88 L 177.56 222.86 L 178.83 224.26 L 179.84 226.17 L 180.44 229.98 L 180.68 233.84 L 180.60 241.60 L 180.56 245.48 L 180.70 249.35 L 181.09 254.74 L 181.97 260.01 L 183.37 265.19 L 185.24 270.21 L 187.53 275.03 L 190.26 279.66 L 193.37 284.03 L 196.83 288.10 L 200.65 291.87 L 204.78 295.30 L 209.17 298.32 L 213.84 300.95 L 218.39 303.11 L 223.06 304.83 L 227.86 306.12 L 232.81 306.97 L 237.82 307.40 L 242.80 307.44 L 247.76 307.08 L 252.73 306.32 L 257.39 305.15 L 261.90 303.62 L 266.26 301.72 L 270.49 299.45 L 274.53 296.85 L 278.33 293.97 L 281.91 290.82 L 285.27 287.37 L 288.36 283.67 L 291.13 279.79 L 293.59 275.71 L 295.74 271.42 L 297.54 266.97 L 298.94 262.42 L 299.96 257.74 L 300.58 252.93 L 300.86 248.33 L 300.84 243.69 L 300.74 234.43 L 301.09 229.85 L 301.99 225.35 L 303.07 223.68 L 304.39 222.45 L 305.96 221.61 L 307.71 221.13 L 309.48 221.00 L 311.31 221.22 L 313.07 221.77 L 314.60 222.62 L 315.95 223.77 L 316.98 225.17 L 317.59 226.85 L 317.79 228.86 L 318.00 244.49 L 317.81 252.34 L 317.09 260.10 L 316.19 265.19 L 314.83 270.20 L 312.99 275.13 L 310.09 281.55 L 306.65 287.65 L 302.65 293.45 L 298.17 298.88 L 293.27 303.88 L 287.93 308.45 L 282.21 312.53 L 276.19 316.04 L 269.86 319.01 L 264.93 320.84 L 259.91 322.29 L 254.76 323.36 L 249.48 324.05 L 249.40 341.59 L 255.60 341.93 L 261.82 341.99 L 274.25 341.87 L 280.45 342.02 L 286.63 342.53 L 288.13 343.04 L 289.38 343.81 L 290.41 344.84 L 291.55 346.74 L 292.19 348.92 L 292.34 351.20 L 291.97 353.38 L 291.44 354.74 L 290.69 355.89 L 289.72 356.85 L 288.57 357.54 L 287.16 357.91 L 285.43 357.95 L 195.69 357.91 L 193.64 357.71 L 192.05 357.07 L 190.82 356.01 L 189.90 354.62 L 189.30 353.04 L 189.04 351.23 L 189.12 349.39 L 189.54 347.65 L 190.29 345.97 L 191.37 344.52 L 192.76 343.41 L 194.52 342.63 L 200.73 342.05 L 206.96 341.85 L 219.48 341.97 L 225.74 341.99 L 231.99 341.76 L 231.92 323.98 L 226.90 323.35 L 222.01 322.37 L 217.21 321.04 L 212.51 319.37 L 206.44 316.64 L 200.65 313.41 L 195.13 309.66 L 189.94 305.45 L 185.13 300.85 L 180.70 295.83 L 176.69 290.46 L 173.17 284.81 L 170.13 278.86 L 167.87 273.62 L 166.10 268.27 L 164.82 262.80 L 163.93 257.24 L 163.35 251.61 L 163.07 245.93 L 163.03 237.40 L 163.29 228.96 L 163.29 228.96 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="路径" node-id="19" stroke="none" target-height="136.95001" target-width="154.97" target-x="163.03" target-y="221"></path></g></svg>
\ No newline at end of file
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="21"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="0.5" x2="0.5" y1="0" y2="0.7433493"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="28"><g node-id="29"><g node-id="30"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#fdc96c" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="13" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="31"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#fdc96c" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="14" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="32"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#fdc96c" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="15" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="33"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#fdc96c" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="16" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#fdc96c" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="17" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 196.04 175.96 L 196.49 170.94 L 197.50 166.10 L 199.08 161.41 L 201.17 156.91 L 203.72 152.70 L 206.73 148.76 L 210.14 145.15 L 213.88 141.97 L 217.98 139.19 L 222.36 136.88 L 226.97 135.12 L 231.84 133.88 L 235.72 133.25 L 239.53 133.00 L 243.30 133.13 L 247.06 133.62 L 251.96 134.81 L 256.66 136.54 L 261.19 138.81 L 265.44 141.57 L 269.34 144.74 L 272.91 148.35 L 276.07 152.31 L 278.72 156.56 L 280.89 161.12 L 282.16 164.69 L 283.06 168.36 L 283.61 172.13 L 283.80 176.04 L 271.28 175.94 L 265.04 176.02 L 258.81 176.56 L 257.55 177.24 L 256.57 178.08 L 255.83 179.08 L 255.13 180.82 L 254.97 182.66 L 255.32 184.47 L 256.18 186.10 L 257.01 187.00 L 258.07 187.70 L 259.40 188.20 L 265.51 188.66 L 271.65 188.74 L 283.92 188.74 L 283.89 205.80 L 271.51 205.82 L 265.32 205.94 L 259.19 206.53 L 257.80 207.05 L 256.73 207.77 L 255.91 208.70 L 255.11 210.36 L 254.85 212.21 L 255.13 214.07 L 255.93 215.79 L 256.73 216.77 L 257.76 217.56 L 259.05 218.15 L 263.13 218.73 L 267.24 218.98 L 275.52 218.94 L 283.78 218.86 L 283.78 234.52 L 272.23 234.41 L 266.45 234.45 L 260.69 234.75 L 259.32 234.97 L 258.13 235.46 L 257.10 236.22 L 255.87 237.71 L 255.10 239.50 L 254.84 241.42 L 255.14 243.29 L 255.67 244.43 L 256.50 245.42 L 257.68 246.29 L 262.01 246.92 L 266.39 247.19 L 275.20 247.15 L 279.61 247.11 L 284.00 247.22 L 283.42 252.72 L 282.34 258.19 L 281.31 261.76 L 280.01 265.21 L 278.44 268.53 L 276.56 271.68 L 274.34 274.61 L 271.78 277.34 L 268.48 280.56 L 264.88 283.35 L 260.98 285.72 L 256.83 287.67 L 252.54 289.17 L 248.07 290.24 L 243.52 290.85 L 238.97 290.99 L 234.40 290.67 L 229.89 289.87 L 225.53 288.58 L 221.28 286.80 L 217.56 284.87 L 214.12 282.63 L 210.94 280.08 L 208.00 277.19 L 205.35 274.04 L 203.01 270.70 L 200.99 267.16 L 199.27 263.39 L 197.90 259.49 L 196.90 255.52 L 196.26 251.46 L 196.00 247.29 L 204.77 247.15 L 213.55 247.18 L 217.92 246.99 L 222.28 246.54 L 223.61 245.91 L 224.60 245.08 L 225.30 244.03 L 225.87 242.21 L 225.86 240.23 L 225.30 238.32 L 224.24 236.66 L 223.27 235.77 L 222.12 235.16 L 220.73 234.82 L 214.60 234.46 L 208.45 234.41 L 196.14 234.52 L 196.16 218.86 L 208.44 218.98 L 214.59 218.92 L 220.73 218.53 L 222.14 218.15 L 223.29 217.53 L 224.25 216.66 L 225.31 215.02 L 225.86 213.14 L 225.86 211.20 L 225.29 209.37 L 224.59 208.31 L 223.61 207.44 L 222.30 206.74 L 217.98 206.14 L 213.63 205.86 L 204.87 205.86 L 196.12 205.78 L 196.07 188.75 L 208.35 188.73 L 214.48 188.68 L 220.61 188.35 L 222.00 188.08 L 223.15 187.54 L 224.09 186.75 L 225.14 185.22 L 225.72 183.41 L 225.77 181.49 L 225.29 179.64 L 224.68 178.52 L 223.80 177.57 L 222.64 176.77 L 218.24 176.19 L 213.81 175.94 L 204.92 175.99 L 196.04 175.96 L 196.04 175.96 Z" fill="#ffffff" fill-rule="nonzero" group-id="1" id="路径" node-id="18" stroke="none" target-height="157.98999" target-width="88" target-x="196" target-y="133"></path><path d="M 163.29 228.96 L 163.65 226.86 L 164.41 225.13 L 165.57 223.69 L 167.05 222.52 L 168.69 221.69 L 170.53 221.19 L 172.44 221.04 L 174.25 221.27 L 176.01 221.88 L 177.56 222.86 L 178.83 224.26 L 179.84 226.17 L 180.44 229.98 L 180.68 233.84 L 180.60 241.60 L 180.56 245.48 L 180.70 249.35 L 181.09 254.74 L 181.97 260.01 L 183.37 265.19 L 185.24 270.21 L 187.53 275.03 L 190.26 279.66 L 193.37 284.03 L 196.83 288.10 L 200.65 291.87 L 204.78 295.30 L 209.17 298.32 L 213.84 300.95 L 218.39 303.11 L 223.06 304.83 L 227.86 306.12 L 232.81 306.97 L 237.82 307.40 L 242.80 307.44 L 247.76 307.08 L 252.73 306.32 L 257.39 305.15 L 261.90 303.62 L 266.26 301.72 L 270.49 299.45 L 274.53 296.85 L 278.33 293.97 L 281.91 290.82 L 285.27 287.37 L 288.36 283.67 L 291.13 279.79 L 293.59 275.71 L 295.74 271.42 L 297.54 266.97 L 298.94 262.42 L 299.96 257.74 L 300.58 252.93 L 300.86 248.33 L 300.84 243.69 L 300.74 234.43 L 301.09 229.85 L 301.99 225.35 L 303.07 223.68 L 304.39 222.45 L 305.96 221.61 L 307.71 221.13 L 309.48 221.00 L 311.31 221.22 L 313.07 221.77 L 314.60 222.62 L 315.95 223.77 L 316.98 225.17 L 317.59 226.85 L 317.79 228.86 L 318.00 244.49 L 317.81 252.34 L 317.09 260.10 L 316.19 265.19 L 314.83 270.20 L 312.99 275.13 L 310.09 281.55 L 306.65 287.65 L 302.65 293.45 L 298.17 298.88 L 293.27 303.88 L 287.93 308.45 L 282.21 312.53 L 276.19 316.04 L 269.86 319.01 L 264.93 320.84 L 259.91 322.29 L 254.76 323.36 L 249.48 324.05 L 249.40 341.59 L 255.60 341.93 L 261.82 341.99 L 274.25 341.87 L 280.45 342.02 L 286.63 342.53 L 288.13 343.04 L 289.38 343.81 L 290.41 344.84 L 291.55 346.74 L 292.19 348.92 L 292.34 351.20 L 291.97 353.38 L 291.44 354.74 L 290.69 355.89 L 289.72 356.85 L 288.57 357.54 L 287.16 357.91 L 285.43 357.95 L 195.69 357.91 L 193.64 357.71 L 192.05 357.07 L 190.82 356.01 L 189.90 354.62 L 189.30 353.04 L 189.04 351.23 L 189.12 349.39 L 189.54 347.65 L 190.29 345.97 L 191.37 344.52 L 192.76 343.41 L 194.52 342.63 L 200.73 342.05 L 206.96 341.85 L 219.48 341.97 L 225.74 341.99 L 231.99 341.76 L 231.92 323.98 L 226.90 323.35 L 222.01 322.37 L 217.21 321.04 L 212.51 319.37 L 206.44 316.64 L 200.65 313.41 L 195.13 309.66 L 189.94 305.45 L 185.13 300.85 L 180.70 295.83 L 176.69 290.46 L 173.17 284.81 L 170.13 278.86 L 167.87 273.62 L 166.10 268.27 L 164.82 262.80 L 163.93 257.24 L 163.35 251.61 L 163.07 245.93 L 163.03 237.40 L 163.29 228.96 L 163.29 228.96 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="路径" node-id="19" stroke="none" target-height="136.95001" target-width="154.97" target-x="163.03" target-y="221"></path></g></svg>
diff --git a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voiceOn.svg b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voiceOn.svg
index aeb96fabea..eca1441d15 100644
--- a/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voiceOn.svg
+++ b/AudioQnA/ui/svelte/src/lib/assets/icons/svg/voiceOn.svg
@@ -1 +1 @@
-<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="24"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="-0.1627282" x2="1" y1="0.5" y2="0.5"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-2" node-id="10" spreadMethod="pad" x1="-0.3416182" x2="1" y1="0.5" y2="0.5"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="37"><g node-id="38"><g node-id="39"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#fbd17e" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="16" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="40"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#fbd17e" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="17" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="41"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#fbd17e" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="18" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="42"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#fbd17e" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="19" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#fbd17e" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="20" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 296.80 181.18 L 301.83 186.61 L 306.29 192.25 L 310.19 198.12 L 313.57 204.22 L 316.46 210.60 L 318.82 217.08 L 320.65 223.67 L 321.95 230.39 L 322.74 237.20 L 323.00 244.00 L 322.74 250.79 L 321.95 257.60 L 320.65 264.33 L 318.82 270.92 L 316.46 277.39 L 313.57 283.77 L 310.19 289.87 L 306.29 295.74 L 301.83 301.38 L 296.80 306.81 L 295.66 307.75 L 294.37 308.43 L 292.97 308.85 L 291.50 309.00 L 291.50 309.00 L 289.29 308.67 L 287.33 307.75 L 285.72 306.31 L 284.57 304.41 L 284.14 302.97 L 284.00 301.55 L 284.14 300.12 L 284.87 298.09 L 286.20 296.31 L 290.39 291.79 L 294.10 287.09 L 297.35 282.20 L 300.16 277.12 L 302.57 271.81 L 304.53 266.42 L 306.05 260.93 L 307.14 255.33 L 307.79 249.66 L 308.01 244.00 L 307.79 238.34 L 307.14 232.67 L 306.05 227.07 L 304.53 221.58 L 302.57 216.19 L 300.16 210.88 L 297.35 205.80 L 294.10 200.91 L 290.39 196.21 L 286.20 191.69 L 284.97 190.09 L 284.24 188.32 L 284.00 186.43 L 284.24 184.55 L 284.97 182.78 L 286.20 181.18 L 287.81 179.96 L 289.60 179.24 L 291.50 179.00 L 293.40 179.24 L 295.19 179.96 L 296.80 181.18 Z M 270.65 204.59 L 274.43 208.76 L 277.67 213.11 L 280.39 217.66 L 282.61 222.42 L 284.36 227.41 L 285.61 232.44 L 286.35 237.54 L 286.60 242.73 L 286.35 247.92 L 285.61 253.01 L 284.36 258.04 L 282.61 263.03 L 280.39 267.79 L 277.67 272.34 L 274.43 276.69 L 270.65 280.86 L 269.52 281.77 L 268.25 282.45 L 266.88 282.86 L 265.43 283.00 L 263.25 282.68 L 261.33 281.77 L 259.75 280.36 L 258.62 278.50 L 258.09 276.39 L 258.19 274.29 L 258.91 272.31 L 260.21 270.56 L 262.98 267.52 L 265.34 264.34 L 267.32 261.02 L 268.94 257.54 L 270.22 253.90 L 271.13 250.23 L 271.67 246.51 L 271.86 242.72 L 271.67 238.94 L 271.13 235.22 L 270.22 231.54 L 268.94 227.91 L 267.32 224.43 L 265.34 221.11 L 262.98 217.93 L 260.21 214.89 L 259.00 213.36 L 258.26 211.62 L 258.00 209.74 L 258.25 207.82 L 259.00 206.03 L 260.16 204.53 L 261.68 203.38 L 263.49 202.64 L 265.44 202.40 L 267.34 202.66 L 269.10 203.40 L 270.65 204.59 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="形状结合" node-id="21" stroke="none" target-height="130" target-width="65" target-x="258" target-y="179"></path><path d="M 242.00 162.22 L 242.00 325.78 L 241.83 327.84 L 241.41 329.37 L 240.79 330.50 L 239.91 331.38 L 238.87 331.87 L 237.63 331.99 L 236.36 331.75 L 234.90 331.11 L 233.19 329.93 L 190.53 295.17 L 188.24 292.92 L 186.09 290.01 L 184.31 286.79 L 182.93 283.37 L 182.03 279.88 L 181.73 276.68 L 181.73 211.31 L 182.03 208.12 L 182.94 204.63 L 184.31 201.21 L 186.10 197.98 L 188.24 195.08 L 190.53 192.82 L 233.19 158.07 L 234.90 156.90 L 236.36 156.25 L 237.63 156.01 L 238.88 156.13 L 239.91 156.62 L 240.79 157.50 L 241.41 158.63 L 241.83 160.16 L 242.00 162.22 Z M 175.20 212.55 L 175.20 275.45 L 175.00 277.51 L 174.49 279.29 L 173.67 280.86 L 172.56 282.24 L 171.23 283.33 L 169.66 284.16 L 167.95 284.66 L 166.09 284.83 L 164.03 284.63 L 147.17 281.38 L 145.16 280.81 L 143.30 279.93 L 141.54 278.75 L 139.98 277.32 L 138.64 275.71 L 137.53 273.91 L 136.69 271.97 L 136.19 269.98 L 136.00 267.91 L 136.00 220.10 L 136.19 218.03 L 136.69 216.04 L 137.53 214.10 L 138.64 212.30 L 139.98 210.69 L 141.54 209.26 L 143.30 208.07 L 145.17 207.20 L 147.17 206.62 L 164.03 203.38 L 166.09 203.18 L 167.95 203.34 L 169.66 203.84 L 171.23 204.67 L 172.56 205.76 L 173.67 207.14 L 174.49 208.71 L 175.00 210.50 L 175.20 212.55 Z" fill="url(#linearGradient-2)" fill-rule="evenodd" group-id="1" id="形状结合" node-id="22" stroke="none" target-height="175.98" target-width="106" target-x="136" target-y="156.01"></path></g></svg>
\ No newline at end of file
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="24"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="-0.1627282" x2="1" y1="0.5" y2="0.5"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-2" node-id="10" spreadMethod="pad" x1="-0.3416182" x2="1" y1="0.5" y2="0.5"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="37"><g node-id="38"><g node-id="39"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#fbd17e" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="16" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="40"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#fbd17e" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="17" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="41"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#fbd17e" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="18" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="42"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#fbd17e" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="19" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#fbd17e" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="20" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 296.80 181.18 L 301.83 186.61 L 306.29 192.25 L 310.19 198.12 L 313.57 204.22 L 316.46 210.60 L 318.82 217.08 L 320.65 223.67 L 321.95 230.39 L 322.74 237.20 L 323.00 244.00 L 322.74 250.79 L 321.95 257.60 L 320.65 264.33 L 318.82 270.92 L 316.46 277.39 L 313.57 283.77 L 310.19 289.87 L 306.29 295.74 L 301.83 301.38 L 296.80 306.81 L 295.66 307.75 L 294.37 308.43 L 292.97 308.85 L 291.50 309.00 L 291.50 309.00 L 289.29 308.67 L 287.33 307.75 L 285.72 306.31 L 284.57 304.41 L 284.14 302.97 L 284.00 301.55 L 284.14 300.12 L 284.87 298.09 L 286.20 296.31 L 290.39 291.79 L 294.10 287.09 L 297.35 282.20 L 300.16 277.12 L 302.57 271.81 L 304.53 266.42 L 306.05 260.93 L 307.14 255.33 L 307.79 249.66 L 308.01 244.00 L 307.79 238.34 L 307.14 232.67 L 306.05 227.07 L 304.53 221.58 L 302.57 216.19 L 300.16 210.88 L 297.35 205.80 L 294.10 200.91 L 290.39 196.21 L 286.20 191.69 L 284.97 190.09 L 284.24 188.32 L 284.00 186.43 L 284.24 184.55 L 284.97 182.78 L 286.20 181.18 L 287.81 179.96 L 289.60 179.24 L 291.50 179.00 L 293.40 179.24 L 295.19 179.96 L 296.80 181.18 Z M 270.65 204.59 L 274.43 208.76 L 277.67 213.11 L 280.39 217.66 L 282.61 222.42 L 284.36 227.41 L 285.61 232.44 L 286.35 237.54 L 286.60 242.73 L 286.35 247.92 L 285.61 253.01 L 284.36 258.04 L 282.61 263.03 L 280.39 267.79 L 277.67 272.34 L 274.43 276.69 L 270.65 280.86 L 269.52 281.77 L 268.25 282.45 L 266.88 282.86 L 265.43 283.00 L 263.25 282.68 L 261.33 281.77 L 259.75 280.36 L 258.62 278.50 L 258.09 276.39 L 258.19 274.29 L 258.91 272.31 L 260.21 270.56 L 262.98 267.52 L 265.34 264.34 L 267.32 261.02 L 268.94 257.54 L 270.22 253.90 L 271.13 250.23 L 271.67 246.51 L 271.86 242.72 L 271.67 238.94 L 271.13 235.22 L 270.22 231.54 L 268.94 227.91 L 267.32 224.43 L 265.34 221.11 L 262.98 217.93 L 260.21 214.89 L 259.00 213.36 L 258.26 211.62 L 258.00 209.74 L 258.25 207.82 L 259.00 206.03 L 260.16 204.53 L 261.68 203.38 L 263.49 202.64 L 265.44 202.40 L 267.34 202.66 L 269.10 203.40 L 270.65 204.59 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="形状结合" node-id="21" stroke="none" target-height="130" target-width="65" target-x="258" target-y="179"></path><path d="M 242.00 162.22 L 242.00 325.78 L 241.83 327.84 L 241.41 329.37 L 240.79 330.50 L 239.91 331.38 L 238.87 331.87 L 237.63 331.99 L 236.36 331.75 L 234.90 331.11 L 233.19 329.93 L 190.53 295.17 L 188.24 292.92 L 186.09 290.01 L 184.31 286.79 L 182.93 283.37 L 182.03 279.88 L 181.73 276.68 L 181.73 211.31 L 182.03 208.12 L 182.94 204.63 L 184.31 201.21 L 186.10 197.98 L 188.24 195.08 L 190.53 192.82 L 233.19 158.07 L 234.90 156.90 L 236.36 156.25 L 237.63 156.01 L 238.88 156.13 L 239.91 156.62 L 240.79 157.50 L 241.41 158.63 L 241.83 160.16 L 242.00 162.22 Z M 175.20 212.55 L 175.20 275.45 L 175.00 277.51 L 174.49 279.29 L 173.67 280.86 L 172.56 282.24 L 171.23 283.33 L 169.66 284.16 L 167.95 284.66 L 166.09 284.83 L 164.03 284.63 L 147.17 281.38 L 145.16 280.81 L 143.30 279.93 L 141.54 278.75 L 139.98 277.32 L 138.64 275.71 L 137.53 273.91 L 136.69 271.97 L 136.19 269.98 L 136.00 267.91 L 136.00 220.10 L 136.19 218.03 L 136.69 216.04 L 137.53 214.10 L 138.64 212.30 L 139.98 210.69 L 141.54 209.26 L 143.30 208.07 L 145.17 207.20 L 147.17 206.62 L 164.03 203.38 L 166.09 203.18 L 167.95 203.34 L 169.66 203.84 L 171.23 204.67 L 172.56 205.76 L 173.67 207.14 L 174.49 208.71 L 175.00 210.50 L 175.20 212.55 Z" fill="url(#linearGradient-2)" fill-rule="evenodd" group-id="1" id="形状结合" node-id="22" stroke="none" target-height="175.98" target-width="106" target-x="136" target-y="156.01"></path></g></svg>
diff --git a/AvatarChatbot/.gitignore b/AvatarChatbot/.gitignore
index 8ad440e683..84dc2308b2 100644
--- a/AvatarChatbot/.gitignore
+++ b/AvatarChatbot/.gitignore
@@ -4,3 +4,5 @@
 *.log
 docker_compose/intel/cpu/xeon/data
 docker_compose/intel/hpu/gaudi/data
+inputs/
+outputs/
diff --git a/AvatarChatbot/README.md b/AvatarChatbot/README.md
index ed0e29e8c1..32b387428a 100644
--- a/AvatarChatbot/README.md
+++ b/AvatarChatbot/README.md
@@ -75,7 +75,7 @@ The AvatarChatbot service can be deployed on either Intel Gaudi2 AI Accelerator
 
 ### Deploy AvatarChatbot on Gaudi
 
-Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instructions on deploying AvatarChatbot on Gaudi.
+Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instructions on deploying AvatarChatbot on Gaudi, and on setting up an UI for the application.
 
 ### Deploy AvatarChatbot on Xeon
 
diff --git a/AvatarChatbot/assets/img/UI.png b/AvatarChatbot/assets/img/UI.png
new file mode 100644
index 0000000000..c78fe3bea8
Binary files /dev/null and b/AvatarChatbot/assets/img/UI.png differ
diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
index 02e64adc92..f0b36c94bb 100644
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
@@ -96,9 +96,9 @@ export ANIMATION_SERVICE_PORT=3008
 ```bash
 export DEVICE="cpu"
 export WAV2LIP_PORT=7860
-export INFERENCE_MODE='wav2lip+gfpgan'
+export INFERENCE_MODE='wav2lip_only'
 export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-export FACE="assets/img/avatar5.png"
+export FACE="assets/img/avatar1.jpg"
 # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
 export AUDIO='None'
 export FACESIZE=96
@@ -188,13 +188,16 @@ The output file will be saved in the current working directory, as `${PWD}` is m
 
 ## Gradio UI
 
-Follow the instructions in [Build Mega Service of AudioQnA on Gaudi](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker_compose/intel/hpu/gaudi/README.md) to build necessary Docker images and start the AudioQnA MegaService with the endpoint `http://localhost:3008/v1/audioqna`. Then run the following command to start the Gradio UI:
-
 ```bash
-cd GenAIExamples/AvatarChatbot/docker/ui/gradio
-python3 app_gradio_demo.py
+cd $WORKPATH/GenAIExamples/AvatarChatbot
+python3 ui/gradio/app_gradio_demo_avatarchatbot.py
 ```
 
+The UI can be viewed at http://${host_ip}:7861  
+<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">  
+In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.  
+\*\* We will enable change of avatar figure between runs in v2.0
+
 ## Troubleshooting
 
 ```bash
diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
index aa6f49bf87..2496b11e87 100644
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -42,7 +42,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
index f84d763efb..b35726f63d 100644
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
@@ -96,9 +96,9 @@ export ANIMATION_SERVICE_PORT=3008
 ```bash
 export DEVICE="hpu"
 export WAV2LIP_PORT=7860
-export INFERENCE_MODE='wav2lip+gfpgan'
+export INFERENCE_MODE='wav2lip_only'
 export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
-export FACE="assets/img/avatar5.png"
+export FACE="assets/img/avatar1.jpg"
 # export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
 export AUDIO='None'
 export FACESIZE=96
@@ -188,14 +188,25 @@ The output file will be saved in the current working directory, as `${PWD}` is m
 
 ## Gradio UI
 
-Follow the instructions in [Build Mega Service of AudioQnA on Gaudi](https://github.com/opea-project/GenAIExamples/blob/main/AudioQnA/docker_compose/intel/hpu/gaudi/README.md) to build necessary Docker images and start the AudioQnA MegaService with the endpoint `http://localhost:3008/v1/audioqna`. Then run the following command to start the Gradio UI:
+```bash
+sudo apt update
+sudo apt install -y yasm pkg-config libx264-dev nasm
+cd $WORKPATH
+git clone https://github.com/FFmpeg/FFmpeg.git
+cd FFmpeg
+sudo ./configure --enable-gpl --enable-libx264 && sudo make -j$(nproc-1) && sudo make install && hash -r
+pip install gradio==4.38.1 soundfile
+```
 
 ```bash
-cd GenAIExamples/AvatarChatbot/docker/ui/gradio
-python3 app_gradio_demo.py
+cd $WORKPATH/GenAIExamples/AvatarChatbot
+python3 ui/gradio/app_gradio_demo_avatarchatbot.py
 ```
 
-The UI can be viewed at http://${host_ip}:7861
+The UI can be viewed at http://${host_ip}:7861  
+<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">  
+In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.  
+\*\* We will enable change of avatar figure between runs in v2.0
 
 ## Troubleshooting
 
diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
index 06a7e4e054..2003bb4a99 100644
--- a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -15,7 +15,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
+      HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
     runtime: habana
     cap_add:
@@ -39,7 +39,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
+      HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
     runtime: habana
     cap_add:
@@ -54,7 +54,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"
@@ -67,7 +67,7 @@ services:
       HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_MODULES: all
+      HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       ENABLE_HPU_GRAPH: true
       LIMIT_HPU_GRAPH: true
@@ -105,7 +105,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
+      HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       DEVICE: ${DEVICE}
       INFERENCE_MODE: ${INFERENCE_MODE}
@@ -132,7 +132,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
+      HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
     runtime: habana
diff --git a/AvatarChatbot/tests/test_compose_on_gaudi.sh b/AvatarChatbot/tests/test_compose_on_gaudi.sh
old mode 100644
new mode 100755
index fc56194b0f..aab0e3b68b
--- a/AvatarChatbot/tests/test_compose_on_gaudi.sh
+++ b/AvatarChatbot/tests/test_compose_on_gaudi.sh
@@ -29,7 +29,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper-gaudi asr llm-tgi speecht5-gaudi tts wav2lip-gaudi animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
 
     docker images && sleep 1s
 }
@@ -74,7 +74,7 @@ function start_services() {
     export FPS=10
 
     # Start Docker Containers
-    docker compose up -d
+    docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
 
     n=0
     until [[ "$n" -ge 100 ]]; do
@@ -86,7 +86,6 @@ function start_services() {
        n=$((n+1))
     done
 
-    # sleep 5m
     echo "All services are up and running"
     sleep 5s
 }
@@ -99,6 +98,7 @@ function validate_megaservice() {
     if [[ $result == *"mp4"* ]]; then
         echo "Result correct."
     else
+        echo "Result wrong, print docker logs."
         docker logs whisper-service > $LOG_PATH/whisper-service.log
         docker logs asr-service > $LOG_PATH/asr-service.log
         docker logs speecht5-service > $LOG_PATH/speecht5-service.log
@@ -107,19 +107,13 @@ function validate_megaservice() {
         docker logs llm-tgi-gaudi-server > $LOG_PATH/llm-tgi-gaudi-server.log
         docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
         docker logs animation-gaudi-server > $LOG_PATH/animation-gaudi-server.log
-
-        echo "Result wrong."
+        echo "Exit test."
         exit 1
     fi
 
 }
 
 
-#function validate_frontend() {
-
-#}
-
-
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/hpu/gaudi
     docker compose down
@@ -127,15 +121,17 @@ function stop_docker() {
 
 
 function main() {
-
     stop_docker
+    echo y | docker builder prune --all
+    echo y | docker image prune
+
     if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
     start_services
     # validate_microservices
     validate_megaservice
     # validate_frontend
-    stop_docker
 
+    stop_docker
     echo y | docker builder prune --all
     echo y | docker image prune
 
diff --git a/AvatarChatbot/tests/test_compose_on_xeon.sh b/AvatarChatbot/tests/test_compose_on_xeon.sh
old mode 100644
new mode 100755
index 1b1780a1b4..2bed682cfd
--- a/AvatarChatbot/tests/test_compose_on_xeon.sh
+++ b/AvatarChatbot/tests/test_compose_on_xeon.sh
@@ -29,7 +29,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper asr llm-tgi speecht5 tts wav2lip animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
 
     docker images && sleep 1s
 }
diff --git a/AvatarChatbot/ui/gradio/app_gradio_demo.py b/AvatarChatbot/ui/gradio/app_gradio_demo.py
deleted file mode 100644
index 9317570e5c..0000000000
--- a/AvatarChatbot/ui/gradio/app_gradio_demo.py
+++ /dev/null
@@ -1,444 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import asyncio
-import base64
-import io
-import os
-import shutil
-import subprocess
-import time
-
-import aiohttp
-import gradio as gr
-import numpy as np
-import requests
-import soundfile as sf
-from PIL import Image
-
-
-# %% AudioQnA functions
-def preprocess_audio(audio):
-    """The audio data is a 16-bit integer array with values ranging from -32768 to 32767 and the shape of the audio data array is (samples,)"""
-    sr, y = audio
-    # Convert to normalized float32 audio
-    y = y.astype(np.float32)
-    y /= np.max(np.abs(y))
-    # Convert the normalized float32 audio to a WAV file in memory
-    buf = io.BytesIO()
-    sf.write(buf, y, sr, format="WAV")
-    buf.seek(0)  # Reset the buffer position to the beginning
-    # Encode the WAV file to base64 string
-    base64_bytes = base64.b64encode(buf.read())
-    base64_string = base64_bytes.decode("utf-8")
-    return base64_string
-
-
-def base64_to_int16(base64_string):
-    wav_bytes = base64.b64decode(base64_string)
-    buf = io.BytesIO(wav_bytes)
-    y, sr = sf.read(buf, dtype="int16")
-    return sr, y
-
-
-async def transcribe(audio_input):
-    """Input: mic audio; Output: ai audio, text, text"""
-    global ai_chatbot_url, chat_history
-    chat_history = ""
-    # Preprocess the audio
-    base64bytestr = preprocess_audio(audio_input)
-
-    # if not audio_choice:
-    #     base64bytestr = preprocess_audio(audio_input)
-    # else:
-    #     # convert wav file to base64
-    #     audio_index = int(audio_choice.split(".")[0]) - 1
-    #     audio_filepath = audio_filepaths[audio_index]
-    #     audio_input.value = audio_filepath
-    #     with open(audio_filepath, "rb") as file:
-    #         base64bytestr = base64.b64encode(file.read()).decode('utf-8')
-
-    # Send the audio to the backend server
-    initial_inputs = {"audio": base64bytestr, "max_tokens": 64}
-
-    async with aiohttp.ClientSession() as session:
-        async with session.post(ai_chatbot_url, json=initial_inputs) as response:
-            # response = requests.post(ai_chatbot_url, json=initial_inputs)
-
-            # Check the response status code
-            if response.status == 200:
-                response_json = await response.json()
-                # with open("response.txt", "w") as file:
-                #     file.write(response)
-
-                # Decode the base64 string
-                sampling_rate, audio_int16 = base64_to_int16(response_json["byte_str"])
-                chat_history += f"User: {response_json['query']}\n\n"
-
-                chat_ai = response_json["text"]
-                hitted_ends = [",", ".", "?", "!", "。", ";"]
-                last_punc_idx = max([chat_ai.rfind(punc) for punc in hitted_ends])
-                if last_punc_idx != -1:
-                    chat_ai = chat_ai[: last_punc_idx + 1]
-                chat_history += f"AI: {chat_ai}"
-                chat_history = chat_history.replace("OPEX", "OPEA")
-                return (sampling_rate, audio_int16)  # handle the response
-            else:
-                return {"error": "Failed to transcribe audio", "status_code": response.status_code}
-
-
-def resize_image(image_pil, size=(720, 720)):
-    """Resize the image to the specified size."""
-    return image_pil.resize(size, Image.LANCZOS)
-
-
-def resize_video(video_path, save_path, size=(720, 1280)):
-    """Resize the video to the specified size."""
-    # command_resize_video = f"ffmpeg -y -i {video_path} -vf scale={size[0]}:{size[1]} {save_path}"
-    # subprocess.run(command_resize_video, shell=True)
-
-
-# %% Wav2Lip functions
-async def gen_video(image, audio, model_choice):
-    """Input: image (saved .png path), ai audio (saved .wav path); Output: video"""
-    # 0. Preprocess audio
-    # buf = io.BytesIO()
-    sr, y = audio
-    output_audio_save_path = "inputs/intermediate.wav"
-    sf.write(output_audio_save_path, y, sr, format="WAV")
-
-    # 1. Set environment variables
-    match model_choice:
-        case "wav2lip":
-            os.environ["INFERENCE_MODE"] = "wav2lip_only"
-            os.environ["CHECKPOINT_PATH"] = "Wav2Lip/checkpoints/wav2lip.pth"
-        case "wav2lip+GAN":
-            os.environ["INFERENCE_MODE"] = "wav2lip_only"
-            os.environ["CHECKPOINT_PATH"] = "Wav2Lip/checkpoints/wav2lip_gan.pth"
-        case "wav2lip+GFPGAN":
-            os.environ["INFERENCE_MODE"] = "wav2lip+gfpgan"
-            os.environ["CHECKPOINT_PATH"] = "Wav2Lip/checkpoints/wav2lip.pth"
-
-    # os.environ['INFERENCE_MODE'] = 'wav2lip_only'
-    # os.environ['CHECKPOINT_PATH'] = 'Wav2Lip/checkpoints/wav2lip_gan.pth'
-    os.environ["FACE"] = image  # path to either an image or a video
-    os.environ["AUDIO"] = output_audio_save_path  # path to .wav audio
-    # os.environ['AUDIO'] = audio
-    os.environ["FACESIZE"] = "96"
-    os.environ["OUTFILE"] = "outputs/result6.mp4"
-    os.environ["GFPGAN_MODEL_VERSION"] = "1.3"
-    os.environ["UPSCALE_FACTOR"] = "1"  # int
-    # os.environ['FPS'] = '25.' # can be lower (e.g., 10)
-    os.environ["FPS"] = "10."  # can be lower when using an image (e.g., 10)
-
-    # 2. Run inference.sh bash script to perform Wav2Lip+GFPGAN inference
-    # Output video is saved at the path 'OUTFILE'
-    # command_wav2lip_gfpgan = "bash inference_vars.sh"
-    # subprocess.run(command_wav2lip_gfpgan, shell=True)
-
-    outfile = os.environ.get("OUTFILE")
-    if os.path.exists(outfile):
-        res_video = outfile
-    else:
-        res_video = "inputs/loading.mp4"
-    return res_video
-
-
-# %% AI Avatar demo function
-# ctao 7/19 - make it asynchronous
-async def aiavatar_demo(audio_input):
-    """Input: mic audio, image; Output: ai audio, text, text, ai video"""
-    # Include AudioQnA
-    output_audio = await transcribe(audio_input)  # AudioQnA
-
-    if isinstance(output_audio, dict):  # in case of an error
-        return None, None
-    else:
-        sr, audio_int16 = output_audio
-        audio_file = "outputs/output_audio.wav"
-        sf.write(audio_file, audio_int16, sr)
-        # return audio_file, audio_file, image
-        return audio_file
-
-
-async def final_update(audio, image, model_choice):
-    res_video = await gen_video(image, audio, model_choice)
-    return res_video
-
-
-# %% Main
-if __name__ == "__main__":
-    # HOST_IP = os.getenv("host_ip")
-    HOST_IP = subprocess.check_output("hostname -I | awk '{print $1}'", shell=True).decode("utf-8").strip()
-
-    # Fetch the AudioQnA backend server
-    ai_chatbot_url = f"http://{HOST_IP}:3008/v1/audioqna"
-
-    # Collect chat history to print in the interface
-    chat_history = ""
-
-    # Prepare 3 image paths
-    # HOME = os.getenv("HOME")
-    # HOME="/mnt/localdisk4"
-    HOME = "/home/demo/"
-    image_pils = [
-        Image.open(os.path.join("../assets/img/woman1.png")),
-        Image.open(os.path.join("../assets/img/man1.png")),
-        Image.open(os.path.join("../assets/img/woman2.png")),
-    ]
-
-    video_paths = [
-        os.path.join("../assets/video/man1.mp4"),
-        os.path.join("../assets/video/woman2.mp4"),
-        os.path.join("../assets/video/man4.mp4"),
-    ]
-
-    def image_to_base64(image_path):
-        with open(image_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode("utf-8")
-
-    # Convert your images to Base64
-    # opea_qr_base64 = image_to_base64('../rfcs/opea_qr.png')
-    # opea_gh_qr_base64 = image_to_base64('../rfcs/opea_gh_qr.png')
-    xeon_base64 = image_to_base64("../rfcs/xeon.jpg")
-    gaudi_base64 = image_to_base64("../rfcs/gaudi.png")
-
-    # List of prerecorded WAV files containing audio questions
-    audio_filepaths = [
-        "../assets/audio/intel1.wav",
-        "../assets/audio/intel2.wav",
-        "../assets/audio/intel3.wav",
-        "../assets/audio/intel4.wav",
-        "../assets/audio/pnp1.wav",
-        "../assets/audio/pnp2.wav",
-        "../assets/audio/pnp3.wav",
-        "../assets/audio/pnp4.wav",
-        "../assets/audio/entertainment1.wav",
-        "../assets/audio/entertainment2.wav",
-    ]
-    audio_questions = [
-        "1. What are the latest data center processor and AI accelerator products at Intel? Name them.",
-        "2. What's the objective of the Open Platform for Enterprise AI? How is it helpful to enterprises building AI solutions?",
-        "3. What is Intel's Gaudi 3 AI Accelerator performance compared to Nvidia H100?",
-        "4. What kinds of Intel AI tools are available to accelerate AI workloads?",
-        "5. What is Plug and Play Technology Center? Where is it located?",
-        "6. Tell us about inflation in the US in the past few years?",
-        "7. What is the difference between an index fund and a mutual fund?",
-        "8. What is the difference between pretax and roth retirement accounts?",
-        "9. Which team won the Superbowl in 2022?",
-        "10. In the Lord of the Rings, who threw the Ring into Mount Doom?",
-    ]
-
-    # Demo frontend
-    demo = gr.Blocks()
-    with demo:
-        # Define processing functions
-        count = 0
-
-        def initial_process(audio_input):
-            global count, chat_history
-            start_time = time.time()
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            audio_file = loop.run_until_complete(aiavatar_demo(audio_input))
-            count += 1
-            end_time = time.time()
-            return audio_file, gr.State(value=str(count)), f"{(end_time - start_time):.1f} seconds", chat_history
-
-        def final_process(audio, image, model_choice):
-            start_time = time.time()
-            # loop = asyncio.get_event_loop()
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            res_video = loop.run_until_complete(final_update(audio, image, model_choice))
-            end_time = time.time()
-            return res_video, f"{(end_time - start_time):.1f} seconds"
-
-        def update_selected_image_state(image_index):
-            selected_image_state.value = image_index
-            # change image_input here
-            if image_index < len(image_pils):
-                return f"inputs/face_{image_index}.png"
-            else:
-                return f"inputs/video_{image_index - len(image_pils)}.mp4"
-
-        def update_audio_input(audio_choice):
-            if audio_choice:
-                audio_index = int(audio_choice.split(".")[0]) - 1
-                audio_filepath_gradio = f"inputs/audio_{audio_index:d}.wav"
-                shutil.copyfile(audio_filepaths[audio_index], audio_filepath_gradio)
-                # audio_input.value = audio_filepath_gradio
-                return audio_filepath_gradio
-
-        # UI Components
-        # Title & Introduction
-        gr.Markdown("<h1 style='font-size: 36px;'>A PyTorch and OPEA based AI Avatar Audio Chatbot</h1>")
-        # gr.Markdown("# **Using OPEA to implement a RAG-Powered Human-Like AI Avatar Audio Chatbot**")
-        with gr.Row():
-            with gr.Column(scale=8):
-                gr.Markdown(
-                    """
-                <p style='font-size: 24px;'>Welcome to our AI Avatar Audio Chatbot! This application leverages PyTorch and <strong>OPEA (Open Platform for Enterprise AI) v0.8</strong> to provide you with a human-like conversational experience. It's run on Intel® Gaudi® AI Accelerator and Intel® Xeon® Processor, with hardware and software optimizations.<br>
-                Please feel free to interact with the AI avatar by choosing your own avatar and talking into the mic.</p>
-                            """
-                )
-            with gr.Column(scale=1):
-                # with gr.Row():
-                #     gr.Markdown(f"""
-                #     <img src='data:image/png;base64,{opea_qr_base64}' alt='OPEA QR Code' style='width: 150px; height: auto;'>
-                #     """, label="OPEA QR Code")
-                #     gr.Markdown(f"""
-                #     <img src='data:image/png;base64,{opea_gh_qr_base64}' alt='OPEA GitHub QR Code' style='width: 150px; height: auto;'>
-                #     """, label="OPEA GitHub QR Code")
-                with gr.Row():
-                    gr.Markdown(
-                        f"""
-                    <img src='data:image/png;base64,{gaudi_base64}' alt='Intel®Gaudi' style='width: 120px; height: auto;'>""",
-                        label="Intel®Gaudi",
-                    )
-                    gr.Markdown(
-                        f"""
-                    <img src='data:image/png;base64,{xeon_base64}' alt='Intel®Xeon' style='width: 120px; height: auto;'>""",
-                        label="Intel®Xeon",
-                    )
-        gr.Markdown("<hr>")  # Divider
-
-        # Inputs
-        # Image gallery
-        selected_image_state = gr.State(value=-1)
-        image_clicks = []
-        image_click_buttons = []
-        video_clicks = []
-        video_click_buttons = []
-        with gr.Row():
-            with gr.Column(scale=1):
-                audio_input = gr.Audio(sources=None, format="wav", label="🎤 or 📤 for your Input audio!")
-                audio_choice = gr.Dropdown(
-                    choices=audio_questions,
-                    label="Choose an audio question",
-                    value=None,  # default value
-                )
-                # Update audio_input when a selection is made from the dropdown
-                audio_choice.change(fn=update_audio_input, inputs=audio_choice, outputs=audio_input)
-
-                face_input = gr.File(
-                    file_count="single",
-                    file_types=["image", "video"],
-                    label="Choose an avatar or 📤 an image or video!",
-                )
-                model_choice = gr.Dropdown(
-                    choices=["wav2lip", "wav2lip+GAN", "wav2lip+GFPGAN"],
-                    label="Choose a DL model",
-                )
-            with gr.Column(scale=2):
-                # Display 3 images and buttons
-                with gr.Row():
-                    for i, image_pil in enumerate(image_pils):
-                        image_pil = resize_image(image_pil)
-                        save_path = f"inputs/face_{i}.png"
-                        image_pil.save(save_path, "PNG")
-                        image_clicks.append(gr.Image(type="filepath", value=save_path, label=f"Avatar {i+1}"))
-                with gr.Row():
-                    for i in range(len(image_pils)):
-                        image_click_buttons.append(gr.Button(f"Use Image {i+1}"))
-                # Display 3 videos and buttons
-                with gr.Row():
-                    for i, video_path in enumerate(video_paths):
-                        save_path = f"inputs/video_{i}.mp4"
-                        # shutil.copyfile(video_path, save_path)
-                        resize_video(video_path, save_path)
-                        video_clicks.append(gr.Video(value=save_path, label=f"Video {i+1}"))
-                with gr.Row():
-                    for i in range(len(video_paths)):
-                        video_click_buttons.append(gr.Button(f"Use Video {i+1}"))
-
-        submit_button = gr.Button("Submit")
-
-        # Outputs
-        gr.Markdown("<hr>")  # Divider
-        with gr.Row():
-            with gr.Column(scale=1):
-                audio_output_interm = gr.Audio(label="🔊 Output audio", autoplay=True)
-                chat_history_box = gr.Textbox(label="Chat History", value=chat_history)
-                audio_time_text = gr.Textbox(label="Audio processing time", value="0.0 seconds")
-            with gr.Column(scale=2):
-                video_output = gr.Video(label="Your AI Avatar video: ", format="mp4", width=1280, height=720)
-                video_time_text = gr.Textbox(label="Video processing time", value="0.0 seconds")
-
-        # Technical details
-        gr.Markdown("<hr>")  # Divider
-        with gr.Row():
-            gr.Markdown(
-                """
-                <p style='font-size: 24px;'>OPEA megaservice deployed: <br>
-                <ul style='font-size: 24px;'>
-                    <li><strong>AvatarChatbot</strong></li>
-                </ul></p>
-                <p style='font-size: 24px;'>OPEA microservices deployed:
-                <ul style='font-size: 24px;'>
-                    <li><strong>ASR</strong> (service: opea/whisper-gaudi, model: openai/whisper-small)</li>
-                    <li><strong>LLM 'text-generation'</strong> (service: opea/llm-tgi, model: Intel/neural-chat-7b-v3-3)</li>
-                    <li><strong>TTS</strong> (service: opea/speecht5-gaudi, model: microsoft/speecht5_tts)</li>
-                    <li><strong>Animation</strong> (service: opea/animation, model: wav2lip+gfpgan)</li>
-                </ul></p>
-                        """
-            )
-            # <p style='font-size: 20px;'>OPEA's "AvatarChatbot" megaservice is composed of "ASR->LLM->TTS->Animation" microservices. It first generates an expert answer based on your query, and then animates the avatar figure with output audio. Feel free to interact with the AI avatar by choosing your own avatar and talking into the mic. </p>
-        with gr.Row():
-            gr.Image("./flowchart_1.png", label="Megaservice Flowchart")
-        with gr.Row():
-            gr.Markdown(
-                """
-            <p style='font-size: 24px;'>The AI Avatar Audio Chatbot is powered by the following Intel® AI software:<br>
-                        <ul style='font-size: 24px;'>
-                        <li><strong>Intel Gaudi Software v1.17.0</strong></li>
-                        <li><strong>PyTorch v2.3.1 (Eager mode + torch.compile) </strong></li>
-                        <li><strong>HPU Graph</strong></li>
-                        <li><strong>Intel Neural Compressor (INC)</strong></li>
-                        </ul></p>
-                        """
-            )
-
-        # Disclaimer
-        gr.Markdown("<hr>")  # Divider
-        gr.Markdown("<h2 style='font-size: 24px;'>Notices & Disclaimers</h1>")
-        gr.Markdown(
-            """
-                    <p style='font-size: 20px;'>Intel is committed to respecting human rights and avoiding complicity in human rights abuses. See Intel's Global Human Rights Principles. Intel's products and software are intended only to be used in applications that do not cause or contribute to a violation of an internationally recognized human right.<br></p>
-                    <p style='font-size: 20px;'>© Intel Corporation.  Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries.  Other names and brands may be claimed as the property of others.<br></p>
-                    <p style='font-size: 20px;'>You may not use or facilitate the use of this document in connection with any infringement or other legal analysis concerning Intel products described herein. You agree to grant Intel a non-exclusive, royalty-free license to any patent claim thereafter drafted which includes subject matter disclosed herein.<br></p>
-                    """
-        )
-
-        # States
-        interm_state = gr.State(value="initial")
-
-        # State transitions
-        for i in range(len(image_pils)):
-            image_click_buttons[i].click(
-                update_selected_image_state, inputs=[gr.Number(value=i, visible=False)], outputs=[face_input]
-            )
-        for i in range(len(video_paths)):
-            video_click_buttons[i].click(
-                update_selected_image_state,
-                inputs=[gr.Number(value=i + len(image_pils), visible=False)],
-                outputs=[face_input],
-            )
-        # submit_button = gr.Button("Submit")
-        submit_button.click(
-            initial_process,
-            inputs=[audio_input],
-            outputs=[
-                audio_output_interm,
-                interm_state,
-                audio_time_text,
-                chat_history_box,
-            ],  # need to change interm_state
-        )
-        interm_state.change(
-            final_process,
-            inputs=[audio_output_interm, face_input, model_choice],
-            outputs=[video_output, video_time_text],
-        )
-
-        demo.queue().launch(server_name="0.0.0.0", server_port=7861)
diff --git a/AvatarChatbot/ui/gradio/app_gradio_demo_avatarchatbot.py b/AvatarChatbot/ui/gradio/app_gradio_demo_avatarchatbot.py
new file mode 100644
index 0000000000..19817d5051
--- /dev/null
+++ b/AvatarChatbot/ui/gradio/app_gradio_demo_avatarchatbot.py
@@ -0,0 +1,349 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+import base64
+import io
+import os
+import shutil
+import subprocess
+import time
+
+import aiohttp
+import docker
+import ffmpeg
+import gradio as gr
+import numpy as np
+import soundfile as sf
+from PIL import Image
+
+
+# %% Docker Management
+def update_env_var_in_container(container_name, env_var, new_value):
+    return
+
+
+# %% AudioQnA functions
+def preprocess_audio(audio):
+    """The audio data is a 16-bit integer array with values ranging from -32768 to 32767 and the shape of the audio data array is (samples,)"""
+    sr, y = audio
+
+    # Convert to normalized float32 audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+
+    # Save to memory
+    buf = io.BytesIO()
+    sf.write(buf, y, sr, format="WAV")
+    buf.seek(0)  # Reset the buffer position to the beginning
+
+    # Encode the WAV file to base64 string
+    base64_bytes = base64.b64encode(buf.read())
+    base64_string = base64_bytes.decode("utf-8")
+    return base64_string
+
+
+def base64_to_int16(base64_string):
+    wav_bytes = base64.b64decode(base64_string)
+    buf = io.BytesIO(wav_bytes)
+    y, sr = sf.read(buf, dtype="int16")
+    return sr, y
+
+
+async def transcribe(audio_input, face_input, model_choice):
+    """Input: mic audio; Output: ai audio, text, text"""
+    global ai_chatbot_url, chat_history, count
+    chat_history = ""
+    # Preprocess the audio
+    base64bytestr = preprocess_audio(audio_input)
+
+    # Send the audio to the AvatarChatbot backend server endpoint
+    initial_inputs = {"audio": base64bytestr, "max_tokens": 64}
+
+    # TO-DO: update wav2lip-service with the chosen face_input
+    # update_env_var_in_container("wav2lip-service", "DEVICE", "new_device_value")
+
+    async with aiohttp.ClientSession() as session:
+        async with session.post(ai_chatbot_url, json=initial_inputs) as response:
+
+            # Check the response status code
+            if response.status == 200:
+                # response_json = await response.json()
+                # # Decode the base64 string
+                # sampling_rate, audio_int16 = base64_to_int16(response_json["byte_str"])
+                # chat_history += f"User: {response_json['query']}\n\n"
+                # chat_ai = response_json["text"]
+                # hitted_ends = [",", ".", "?", "!", "。", ";"]
+                # last_punc_idx = max([chat_ai.rfind(punc) for punc in hitted_ends])
+                # if last_punc_idx != -1:
+                #     chat_ai = chat_ai[: last_punc_idx + 1]
+                # chat_history += f"AI: {chat_ai}"
+                # chat_history = chat_history.replace("OPEX", "OPEA")
+                # return (sampling_rate, audio_int16)  # handle the response
+
+                result = await response.text()
+                return "docker_compose/intel/hpu/gaudi/result.mp4"
+            else:
+                return {"error": "Failed to transcribe audio", "status_code": response.status_code}
+
+
+def resize_image(image_pil, size=(720, 720)):
+    """Resize the image to the specified size."""
+    return image_pil.resize(size, Image.LANCZOS)
+
+
+def resize_video(video_path, save_path, size=(720, 1280)):
+    """Resize the video to the specified size, and save to the save path."""
+    ffmpeg.input(video_path).output(save_path, vf=f"scale={size[0]}:{size[1]}").overwrite_output().run()
+
+
+# %% AI Avatar demo function
+async def aiavatar_demo(audio_input, face_input, model_choice):
+    """Input: mic/preloaded audio, avatar file path;
+    Output: ai video"""
+    # Wait for response from AvatarChatbot backend
+    output_video = await transcribe(audio_input, face_input, model_choice)  # output video path
+
+    if isinstance(output_video, dict):  # in case of an error
+        return None, None
+    else:
+        return output_video
+
+
+# %% Main
+if __name__ == "__main__":
+    # HOST_IP = os.getenv("host_ip")
+    HOST_IP = subprocess.check_output("hostname -I | awk '{print $1}'", shell=True).decode("utf-8").strip()
+
+    # Fetch the AudioQnA backend server
+    ai_chatbot_url = f"http://{HOST_IP}:3009/v1/avatarchatbot"
+
+    # Collect chat history to print in the interface
+    chat_history = ""
+
+    # Prepare 3 image paths and 3 video paths
+    # image_pils = [
+    #     Image.open(os.path.join("assets/img/woman1.png")),
+    #     Image.open(os.path.join("assets/img/man1.png")),
+    #     Image.open(os.path.join("assets/img/woman2.png")),
+    # ]
+
+    # video_paths = [
+    #     os.path.join("assets/video/man1.mp4"),
+    #     os.path.join("assets/video/woman2.mp4"),
+    #     os.path.join("assets/video/man4.mp4"),
+    # ]
+
+    def image_to_base64(image_path):
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode("utf-8")
+
+    # Convert your images to Base64
+    xeon_base64 = image_to_base64("assets/img/xeon.jpg")
+    gaudi_base64 = image_to_base64("assets/img/gaudi.png")
+
+    # List of prerecorded WAV files containing audio questions
+    # audio_filepaths = [
+    #     "assets/audio/intel2.wav",
+    #     "assets/audio/intel4.wav",
+    # ]
+    # audio_questions = [
+    #     "1. What's the objective of the Open Platform for Enterprise AI? How is it helpful to enterprises building AI solutions?",
+    #     "2. What kinds of Intel AI tools are available to accelerate AI workloads?",
+    # ]
+
+    # Demo frontend
+    demo = gr.Blocks()
+    with demo:
+        # Define processing functions
+        count = 0
+
+        # Make necessary folders:
+        if not os.path.exists("inputs"):
+            os.makedirs("inputs")
+        if not os.path.exists("outputs"):
+            os.makedirs("outputs")
+
+        def initial_process(audio_input, face_input, model_choice):
+            global count
+            start_time = time.time()
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            video_file = loop.run_until_complete(aiavatar_demo(audio_input, face_input, model_choice))
+            count += 1
+            end_time = time.time()
+            return video_file, f"The entire application took {(end_time - start_time):.1f} seconds"
+
+        # def update_selected_image_state(image_index):
+        #     image_index = int(image_index)
+        #     selected_image_state.value = image_index
+        #     # change image_input here
+        #     if image_index < len(image_pils):
+        #         return f"inputs/face_{image_index}.png"
+        #     else:
+        #         return f"inputs/video_{image_index - len(image_pils)}.mp4"
+
+        # def update_audio_input(audio_choice):
+        #     if audio_choice:
+        #         audio_index = int(audio_choice.split(".")[0]) - 1
+        #         audio_filepath_gradio = f"inputs/audio_{audio_index:d}.wav"
+        #         shutil.copyfile(audio_filepaths[audio_index], audio_filepath_gradio)
+        #         return audio_filepath_gradio
+
+        # UI Components
+        # Title & Introduction
+        gr.Markdown("<h1 style='font-size: 36px;'>A PyTorch and OPEA based AI Avatar Audio Chatbot</h1>")
+        with gr.Row():
+            with gr.Column(scale=8):
+                gr.Markdown(
+                    """
+                <p style='font-size: 24px;'>Welcome to our AI Avatar Audio Chatbot! This application leverages PyTorch and <strong>OPEA (Open Platform for Enterprise AI) v0.8</strong> to provide you with a human-like conversational experience. It's run on Intel® Gaudi® AI Accelerator and Intel® Xeon® Processor, with hardware and software optimizations.<br>
+                Please feel free to interact with the AI avatar by choosing your own avatar and talking into the mic.</p>
+                            """
+                )
+            with gr.Column(scale=1):
+                # with gr.Row():
+                #     gr.Markdown(f"""
+                #     <img src='data:image/png;base64,{opea_qr_base64}' alt='OPEA QR Code' style='width: 150px; height: auto;'>
+                #     """, label="OPEA QR Code")
+                #     gr.Markdown(f"""
+                #     <img src='data:image/png;base64,{opea_gh_qr_base64}' alt='OPEA GitHub QR Code' style='width: 150px; height: auto;'>
+                #     """, label="OPEA GitHub QR Code")
+                with gr.Row():
+                    gr.Markdown(
+                        f"""
+                    <img src='data:image/png;base64,{gaudi_base64}' alt='Intel®Gaudi' style='width: 120px; height: auto;'>""",
+                        label="Intel®Gaudi",
+                    )
+                    gr.Markdown(
+                        f"""
+                    <img src='data:image/png;base64,{xeon_base64}' alt='Intel®Xeon' style='width: 120px; height: auto;'>""",
+                        label="Intel®Xeon",
+                    )
+        gr.Markdown("<hr>")  # Divider
+
+        # Inputs
+        # Image gallery
+        selected_image_state = gr.State(value=-1)
+        image_clicks = []
+        image_click_buttons = []
+        video_clicks = []
+        video_click_buttons = []
+        with gr.Row():
+            with gr.Column(scale=1):
+                audio_input = gr.Audio(
+                    sources=["upload", "microphone"], format="wav", label="🎤 or 📤 for your Input audio!"
+                )
+                # audio_choice = gr.Dropdown(
+                #     choices=audio_questions,
+                #     label="Choose an audio question",
+                #     value=None,  # default value
+                # )
+                # Update audio_input when a selection is made from the dropdown
+                # audio_choice.change(fn=update_audio_input, inputs=audio_choice, outputs=audio_input)
+
+                face_input = gr.File(
+                    file_count="single",
+                    file_types=["image", "video"],
+                    label="Choose an avatar or 📤 an image or video!",
+                )
+                model_choice = gr.Dropdown(
+                    choices=["wav2lip", "wav2lip+GAN", "wav2lip+GFPGAN"],
+                    label="Choose a DL model",
+                )
+            # with gr.Column(scale=2):
+            #     # Display 3 images and buttons
+            #     with gr.Row():
+            #         for i, image_pil in enumerate(image_pils):
+            #             image_pil = resize_image(image_pil)
+            #             save_path = f"inputs/face_{int(i)}.png"
+            #             image_pil.save(save_path, "PNG")
+            #             image_clicks.append(gr.Image(type="filepath", value=save_path, label=f"Avatar {int(i)+1}"))
+            #     with gr.Row():
+            #         for i in range(len(image_pils)):
+            #             image_click_buttons.append(gr.Button(f"Use Image {i+1}"))
+
+            #     # Display 3 videos and buttons
+            #     with gr.Row():
+            #         for i, video_path in enumerate(video_paths):
+            #             save_path = f"inputs/video_{int(i)}.mp4"
+            #             resize_video(video_path, save_path)
+            #             video_clicks.append(gr.Video(value=save_path, label=f"Video {int(i)+1}"))
+            #     with gr.Row():
+            #         for i in range(len(video_paths)):
+            #             video_click_buttons.append(gr.Button(f"Use Video {int(i)+1}"))
+
+        submit_button = gr.Button("Submit")
+
+        # Outputs
+        gr.Markdown("<hr>")  # Divider
+        with gr.Row():
+            with gr.Column():
+                video_output = gr.Video(label="Your AI Avatar video: ", format="mp4", width=1280, height=720)
+                video_time_text = gr.Textbox(label="Video processing time", value="0.0 seconds")
+
+        # Technical details
+        gr.Markdown("<hr>")  # Divider
+        with gr.Row():
+            gr.Markdown(
+                """
+                <p style='font-size: 24px;'>OPEA megaservice deployed: <br>
+                <ul style='font-size: 24px;'>
+                    <li><strong>AvatarChatbot</strong></li>
+                </ul></p>
+                <p style='font-size: 24px;'>OPEA microservices deployed:
+                <ul style='font-size: 24px;'>
+                    <li><strong>ASR</strong> (service: opea/whisper-gaudi, model: openai/whisper-small)</li>
+                    <li><strong>LLM 'text-generation'</strong> (service: opea/llm-tgi, model: Intel/neural-chat-7b-v3-3)</li>
+                    <li><strong>TTS</strong> (service: opea/speecht5-gaudi, model: microsoft/speecht5_tts)</li>
+                    <li><strong>Animation</strong> (service: opea/animation, model: wav2lip+gfpgan)</li>
+                </ul></p>
+                        """
+            )
+        with gr.Row():
+            gr.Image("assets/img/flowchart.png", label="Megaservice Flowchart")
+        with gr.Row():
+            gr.Markdown(
+                """
+            <p style='font-size: 24px;'>The AI Avatar Audio Chatbot is powered by the following Intel® AI software:<br>
+                        <ul style='font-size: 24px;'>
+                        <li><strong>Intel Gaudi Software v1.17.0</strong></li>
+                        <li><strong>PyTorch v2.3.1 (Eager mode + torch.compile) </strong></li>
+                        <li><strong>HPU Graph</strong></li>
+                        <li><strong>Intel Neural Compressor (INC)</strong></li>
+                        </ul></p>
+                        """
+            )
+
+        # Disclaimer
+        gr.Markdown("<hr>")  # Divider
+        gr.Markdown("<h2 style='font-size: 24px;'>Notices & Disclaimers</h1>")
+        gr.Markdown(
+            """
+                    <p style='font-size: 20px;'>Intel is committed to respecting human rights and avoiding complicity in human rights abuses. See Intel's Global Human Rights Principles. Intel's products and software are intended only to be used in applications that do not cause or contribute to a violation of an internationally recognized human right.<br></p>
+                    <p style='font-size: 20px;'>© Intel Corporation.  Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries.  Other names and brands may be claimed as the property of others.<br></p>
+                    <p style='font-size: 20px;'>You may not use or facilitate the use of this document in connection with any infringement or other legal analysis concerning Intel products described herein. You agree to grant Intel a non-exclusive, royalty-free license to any patent claim thereafter drafted which includes subject matter disclosed herein.<br></p>
+                    """
+        )
+
+        # State transitions
+        # for i in range(len(image_pils)):
+        #     image_click_buttons[i].click(
+        #         update_selected_image_state, inputs=[gr.Number(value=i, visible=False)], outputs=[face_input]
+        #     )
+        # for i in range(len(video_paths)):
+        #     video_click_buttons[i].click(
+        #         update_selected_image_state,
+        #         inputs=[gr.Number(value=i + len(image_pils), visible=False)],
+        #         outputs=[face_input],
+        #     )
+        submit_button.click(
+            initial_process,
+            inputs=[audio_input, face_input, model_choice],
+            outputs=[
+                video_output,
+                video_time_text,
+            ],
+        )
+
+        demo.queue().launch(server_name="0.0.0.0", server_port=7861)
diff --git a/ChatQnA/Dockerfile b/ChatQnA/Dockerfile
index ee84069a25..4e431ac773 100644
--- a/ChatQnA/Dockerfile
+++ b/ChatQnA/Dockerfile
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
     pip install --no-cache-dir langchain_core
 
diff --git a/ChatQnA/Dockerfile.guardrails b/ChatQnA/Dockerfile.guardrails
index 168dfb138a..ed811148c0 100644
--- a/ChatQnA/Dockerfile.guardrails
+++ b/ChatQnA/Dockerfile.guardrails
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
     pip install --no-cache-dir langchain_core
 
diff --git a/ChatQnA/Dockerfile.without_rerank b/ChatQnA/Dockerfile.without_rerank
index 030aef1594..7d3a94c5de 100644
--- a/ChatQnA/Dockerfile.without_rerank
+++ b/ChatQnA/Dockerfile.without_rerank
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
     pip install --no-cache-dir langchain_core
 
diff --git a/ChatQnA/Dockerfile.wrapper b/ChatQnA/Dockerfile.wrapper
new file mode 100644
index 0000000000..c06a6811bd
--- /dev/null
+++ b/ChatQnA/Dockerfile.wrapper
@@ -0,0 +1,34 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+WORKDIR /home/user/
+RUN git clone https://github.com/opea-project/GenAIComps.git
+
+WORKDIR /home/user/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
+
+COPY ./chatqna_wrapper.py /home/user/chatqna.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
+
+ENTRYPOINT ["python", "chatqna.py"]
diff --git a/ChatQnA/README.md b/ChatQnA/README.md
index 24569cc746..e3daf68508 100644
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -4,7 +4,26 @@ Chatbots are the most widely adopted use case for leveraging the powerful chat a
 
 RAG bridges the knowledge gap by dynamically fetching relevant information from external sources, ensuring that responses generated remain factual and current. The core of this architecture are vector databases, which are instrumental in enabling efficient and semantic retrieval of information. These databases store data as vectors, allowing RAG to swiftly access the most pertinent documents or data points based on semantic similarity.
 
-## Deploy ChatQnA Service
+## 🤖 Automated Terraform Deployment using Intel® Optimized Cloud Modules for **Terraform**
+
+| Cloud Provider       | Intel Architecture                | Intel Optimized Cloud Module for Terraform                                                                                         | Comments                                                             |
+| -------------------- | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------- |
+| AWS                  | 4th Gen Intel Xeon with Intel AMX | [AWS Module](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna)                          | Uses Intel/neural-chat-7b-v3-3 by default                            |
+| AWS Falcon2-11B      | 4th Gen Intel Xeon with Intel AMX | [AWS Module with Falcon11B](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna-falcon11B) | Uses TII Falcon2-11B LLM Model                                       |
+| GCP                  | 5th Gen Intel Xeon with Intel AMX | [GCP Module](https://github.com/intel/terraform-intel-gcp-vm/tree/main/examples/gen-ai-xeon-opea-chatqna)                          | Also supports Confidential AI by using Intel® TDX with 4th Gen Xeon |
+| Azure                | 5th Gen Intel Xeon with Intel AMX | Work-in-progress                                                                                                                   | Work-in-progress                                                     |
+| Intel Tiber AI Cloud | 5th Gen Intel Xeon with Intel AMX | Work-in-progress                                                                                                                   | Work-in-progress                                                     |
+
+## Automated Deployment to Ubuntu based system(if not using Terraform) using Intel® Optimized Cloud Modules for **Ansible**
+
+To deploy to existing Xeon Ubuntu based system, use our Intel Optimized Cloud Modules for Ansible. This is the same Ansible playbook used by Terraform.
+Use this if you are not using Terraform and have provisioned your system with another tool or manually including bare metal.
+| Operating System | Intel Optimized Cloud Module for Ansible |
+|------------------|------------------------------------------|
+| Ubuntu 20.04 | [ChatQnA Ansible Module](https://github.com/intel/optimized-cloud-recipes/tree/main/recipes/ai-opea-chatqna-xeon) |
+| Ubuntu 22.04 | Work-in-progress |
+
+## Manually Deploy ChatQnA Service
 
 The ChatQnA service can be effortlessly deployed on Intel Gaudi2, Intel Xeon Scalable Processors and Nvidia GPU.
 
diff --git a/ChatQnA/benchmark/accuracy/README.md b/ChatQnA/benchmark/accuracy/README.md
index 0cfae4564b..c073139486 100644
--- a/ChatQnA/benchmark/accuracy/README.md
+++ b/ChatQnA/benchmark/accuracy/README.md
@@ -48,7 +48,7 @@ To setup a LLM model, we can use [tgi-gaudi](https://github.com/huggingface/tgi-
 docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.1 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2
 
 # for better performance, set `PREFILL_BATCH_BUCKET_SIZE`, `BATCH_BUCKET_SIZE`, `max-batch-total-tokens`, `max-batch-prefill-tokens`
-docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048
+docker run -p {your_llm_port}:80 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HF_TOKEN={your_hf_token} -e PREFILL_BATCH_BUCKET_SIZE=1 -e BATCH_BUCKET_SIZE=8 --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id mistralai/Mixtral-8x7B-Instruct-v0.1 --max-input-tokens 2048 --max-total-tokens 4096 --sharded true --num-shard 2 --max-batch-total-tokens 65536 --max-batch-prefill-tokens 2048
 ```
 
 ### Prepare Dataset
diff --git a/ChatQnA/benchmark/accuracy/eval_multihop.py b/ChatQnA/benchmark/accuracy/eval_multihop.py
index 9b07ea2e34..a8f2b9911a 100644
--- a/ChatQnA/benchmark/accuracy/eval_multihop.py
+++ b/ChatQnA/benchmark/accuracy/eval_multihop.py
@@ -41,11 +41,11 @@ def get_reranked_documents(self, query, docs, arguments):
             return []
 
     def get_retrieved_documents(self, query, arguments):
-        data = {"text": query}
+        data = {"inputs": query}
         headers = {"Content-Type": "application/json"}
-        response = requests.post(arguments.embedding_endpoint, data=json.dumps(data), headers=headers)
+        response = requests.post(arguments.tei_embedding_endpoint + "/embed", data=json.dumps(data), headers=headers)
         if response.ok:
-            embedding = response.json()["embedding"]
+            embedding = response.json()[0]
         else:
             print(f"Request for embedding failed due to {response.text}.")
             return []
diff --git a/ChatQnA/benchmark/performance/README.md b/ChatQnA/benchmark/performance-deprecated/README.md
similarity index 100%
rename from ChatQnA/benchmark/performance/README.md
rename to ChatQnA/benchmark/performance-deprecated/README.md
diff --git a/ChatQnA/benchmark/performance/benchmark.sh b/ChatQnA/benchmark/performance-deprecated/benchmark.sh
similarity index 100%
rename from ChatQnA/benchmark/performance/benchmark.sh
rename to ChatQnA/benchmark/performance-deprecated/benchmark.sh
diff --git a/ChatQnA/benchmark/performance/benchmark.yaml b/ChatQnA/benchmark/performance-deprecated/benchmark.yaml
similarity index 100%
rename from ChatQnA/benchmark/performance/benchmark.yaml
rename to ChatQnA/benchmark/performance-deprecated/benchmark.yaml
diff --git a/ChatQnA/benchmark/performance/helm_charts/.helmignore b/ChatQnA/benchmark/performance-deprecated/helm_charts/.helmignore
similarity index 100%
rename from ChatQnA/benchmark/performance/helm_charts/.helmignore
rename to ChatQnA/benchmark/performance-deprecated/helm_charts/.helmignore
diff --git a/ChatQnA/benchmark/performance/helm_charts/Chart.yaml b/ChatQnA/benchmark/performance-deprecated/helm_charts/Chart.yaml
similarity index 100%
rename from ChatQnA/benchmark/performance/helm_charts/Chart.yaml
rename to ChatQnA/benchmark/performance-deprecated/helm_charts/Chart.yaml
diff --git a/ChatQnA/benchmark/performance/helm_charts/README.md b/ChatQnA/benchmark/performance-deprecated/helm_charts/README.md
similarity index 100%
rename from ChatQnA/benchmark/performance/helm_charts/README.md
rename to ChatQnA/benchmark/performance-deprecated/helm_charts/README.md
diff --git a/ChatQnA/benchmark/performance/helm_charts/customize.yaml b/ChatQnA/benchmark/performance-deprecated/helm_charts/customize.yaml
similarity index 100%
rename from ChatQnA/benchmark/performance/helm_charts/customize.yaml
rename to ChatQnA/benchmark/performance-deprecated/helm_charts/customize.yaml
diff --git a/ChatQnA/benchmark/performance/helm_charts/templates/configmap.yaml b/ChatQnA/benchmark/performance-deprecated/helm_charts/templates/configmap.yaml
similarity index 100%
rename from ChatQnA/benchmark/performance/helm_charts/templates/configmap.yaml
rename to ChatQnA/benchmark/performance-deprecated/helm_charts/templates/configmap.yaml
diff --git a/ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml b/ChatQnA/benchmark/performance-deprecated/helm_charts/templates/deployment.yaml
similarity index 100%
rename from ChatQnA/benchmark/performance/helm_charts/templates/deployment.yaml
rename to ChatQnA/benchmark/performance-deprecated/helm_charts/templates/deployment.yaml
diff --git a/ChatQnA/benchmark/performance/helm_charts/templates/service.yaml b/ChatQnA/benchmark/performance-deprecated/helm_charts/templates/service.yaml
similarity index 100%
rename from ChatQnA/benchmark/performance/helm_charts/templates/service.yaml
rename to ChatQnA/benchmark/performance-deprecated/helm_charts/templates/service.yaml
diff --git a/ChatQnA/benchmark/performance/helm_charts/values.yaml b/ChatQnA/benchmark/performance-deprecated/helm_charts/values.yaml
similarity index 100%
rename from ChatQnA/benchmark/performance/helm_charts/values.yaml
rename to ChatQnA/benchmark/performance-deprecated/helm_charts/values.yaml
diff --git a/ChatQnA/benchmark/performance/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml
index 8e74fe6adf..0f7d6176bb 100644
--- a/ChatQnA/benchmark/performance/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/eight_gaudi/oob_eight_gaudi_with_rerank.yaml
@@ -237,7 +237,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
@@ -327,7 +327,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tei-gaudi:latest
+        image: ghcr.io/huggingface/tei-gaudi:1.5.0
         imagePullPolicy: IfNotPresent
         name: reranking-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml
index 95f00644c7..4a5b7b6010 100644
--- a/ChatQnA/benchmark/performance/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml
@@ -237,7 +237,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
@@ -327,7 +327,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tei-gaudi:latest
+        image: ghcr.io/huggingface/tei-gaudi:1.5.0
         imagePullPolicy: IfNotPresent
         name: reranking-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml
index 4fb1657076..9a8ce4a4b5 100644
--- a/ChatQnA/benchmark/performance/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml
@@ -237,7 +237,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
@@ -327,7 +327,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tei-gaudi:latest
+        image: ghcr.io/huggingface/tei-gaudi:1.5.0
         imagePullPolicy: IfNotPresent
         name: reranking-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml
index 0d379f5b5f..c80fc03e33 100644
--- a/ChatQnA/benchmark/performance/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml
@@ -237,7 +237,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
@@ -327,7 +327,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tei-gaudi:latest
+        image: ghcr.io/huggingface/tei-gaudi:1.5.0
         imagePullPolicy: IfNotPresent
         name: reranking-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/oob/without_rerank/eight_gaudi/oob_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/eight_gaudi/oob_eight_gaudi_without_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/oob/without_rerank/eight_gaudi/oob_eight_gaudi_without_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/oob/without_rerank/eight_gaudi/oob_eight_gaudi_without_rerank.yaml
index 42e9ed4d47..91554a8121 100644
--- a/ChatQnA/benchmark/performance/oob/without_rerank/eight_gaudi/oob_eight_gaudi_without_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/eight_gaudi/oob_eight_gaudi_without_rerank.yaml
@@ -237,7 +237,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml
index 0338a8768b..7b81e252a3 100644
--- a/ChatQnA/benchmark/performance/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml
@@ -237,7 +237,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml
index 9d423ffafc..61346908fc 100644
--- a/ChatQnA/benchmark/performance/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml
@@ -237,7 +237,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml
index f405bcce6c..72ada01914 100644
--- a/ChatQnA/benchmark/performance/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml
@@ -237,7 +237,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml
index 9b47fa0be5..a9d63cb817 100644
--- a/ChatQnA/benchmark/performance/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/eight_gaudi/eight_gaudi_with_rerank.yaml
@@ -255,7 +255,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
@@ -345,7 +345,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tei-gaudi:latest
+        image: ghcr.io/huggingface/tei-gaudi:1.5.0
         imagePullPolicy: IfNotPresent
         name: reranking-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml
index 813a8e44c2..7ec356d931 100644
--- a/ChatQnA/benchmark/performance/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml
@@ -255,7 +255,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
@@ -345,7 +345,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tei-gaudi:latest
+        image: ghcr.io/huggingface/tei-gaudi:1.5.0
         imagePullPolicy: IfNotPresent
         name: reranking-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml
index 32e5bd8843..f64be532a8 100644
--- a/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml
@@ -255,7 +255,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
@@ -345,7 +345,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tei-gaudi:latest
+        image: ghcr.io/huggingface/tei-gaudi:1.5.0
         imagePullPolicy: IfNotPresent
         name: reranking-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml
index 055f66f0db..ecf8de7b56 100644
--- a/ChatQnA/benchmark/performance/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml
@@ -255,7 +255,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
@@ -345,7 +345,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tei-gaudi:latest
+        image: ghcr.io/huggingface/tei-gaudi:1.5.0
         imagePullPolicy: IfNotPresent
         name: reranking-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/tuned/without_rerank/eight_gaudi/tuned_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/eight_gaudi/tuned_eight_gaudi_without_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/tuned/without_rerank/eight_gaudi/tuned_eight_gaudi_without_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/eight_gaudi/tuned_eight_gaudi_without_rerank.yaml
index 5d6793cd90..365cd5ab5a 100644
--- a/ChatQnA/benchmark/performance/tuned/without_rerank/eight_gaudi/tuned_eight_gaudi_without_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/eight_gaudi/tuned_eight_gaudi_without_rerank.yaml
@@ -255,7 +255,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml
index 0cfb09b0f6..6af61b1ffb 100644
--- a/ChatQnA/benchmark/performance/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml
@@ -255,7 +255,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml
index a9f7e46391..dc56cc96fb 100644
--- a/ChatQnA/benchmark/performance/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml
@@ -255,7 +255,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml
similarity index 99%
rename from ChatQnA/benchmark/performance/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml
rename to ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml
index a072d36ff5..f14ebc2154 100644
--- a/ChatQnA/benchmark/performance/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml
+++ b/ChatQnA/benchmark/performance-deprecated/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml
@@ -255,7 +255,7 @@ spec:
         envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         name: llm-dependency-deploy
         ports:
diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md
new file mode 100644
index 0000000000..d667727f48
--- /dev/null
+++ b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md
@@ -0,0 +1,204 @@
+# ChatQnA Benchmarking
+
+This folder contains a collection of Kubernetes manifest files for deploying the ChatQnA service across scalable nodes. It includes a comprehensive [benchmarking tool](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md) that enables throughput analysis to assess inference performance.
+
+By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
+
+## Purpose
+
+We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
+
+- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
+- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
+- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
+
+## Metrics
+
+The benchmark will report the below metrics, including:
+
+- Number of Concurrent Requests
+- End-to-End Latency: P50, P90, P99 (in milliseconds)
+- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
+- Average Next Token Latency (in milliseconds)
+- Average Token Latency (in milliseconds)
+- Requests Per Second (RPS)
+- Output Tokens Per Second
+- Input Tokens Per Second
+
+Results will be displayed in the terminal and saved as CSV file named `1_stats.csv` for easy export to spreadsheets.
+
+## Table of Contents
+
+- [Deployment](#deployment)
+  - [Prerequisites](#prerequisites)
+  - [Deployment Scenarios](#deployment-scenarios)
+    - [Case 1: Baseline Deployment with Rerank](#case-1-baseline-deployment-with-rerank)
+    - [Case 2: Baseline Deployment without Rerank](#case-2-baseline-deployment-without-rerank)
+    - [Case 3: Tuned Deployment with Rerank](#case-3-tuned-deployment-with-rerank)
+- [Benchmark](#benchmark)
+  - [Test Configurations](#test-configurations)
+  - [Test Steps](#test-steps)
+    - [Upload Retrieval File](#upload-retrieval-file)
+    - [Run Benchmark Test](#run-benchmark-test)
+    - [Data collection](#data-collection)
+- [Teardown](#teardown)
+
+## Deployment
+
+### Prerequisites
+
+- Kubernetes installation: Use [kubespray](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md) or other official Kubernetes installation guides.
+- Helm installation: Follow the [Helm documentation](https://helm.sh/docs/intro/install/#helm) to install Helm.
+- Setup Hugging Face Token
+
+  To access models and APIs from Hugging Face, set your token as environment variable.
+  ```bash
+  export HF_TOKEN="insert-your-huggingface-token-here"
+  ```
+- Prepare Shared Models (Optional but Strongly Recommended)
+
+  Downloading models simultaneously to multiple nodes in your cluster can overload resources such as network bandwidth, memory and storage. To prevent resource exhaustion, it's recommended to preload the models in advance.
+  ```bash
+  pip install -U "huggingface_hub[cli]"
+  sudo mkdir -p /mnt/models
+  sudo chmod 777 /mnt/models
+  huggingface-cli download --cache-dir /mnt/models Intel/neural-chat-7b-v3-3
+  export MODEL_DIR=/mnt/models
+  ```
+  Once the models are downloaded, you can consider the following methods for sharing them across nodes:
+  - Persistent Volume Claim (PVC): This is the recommended approach for production setups. For more details on using PVC, refer to [PVC](https://github.com/opea-project/GenAIInfra/blob/main/helm-charts/README.md#using-persistent-volume).
+  - Local Host Path: For simpler testing, ensure that each node involved in the deployment follows the steps above to locally prepare the models. After preparing the models, use `--set global.modelUseHostPath=${MODELDIR}` in the deployment command.
+
+- Add OPEA Helm Repository:
+  ```bash
+  python deploy.py --add-repo
+  ```
+- Label Nodes
+  ```base
+  python deploy.py --add-label --num-nodes 2
+  ```
+
+### Deployment Scenarios
+
+The example below are based on a two-node setup. You can adjust the number of nodes by using the `--num-nodes` option.
+
+By default, these commands use the `default` namespace. To specify a different namespace, use the `--namespace` flag with deploy, uninstall, and kubernetes command. Additionally, update the `namespace` field in `benchmark.yaml` before running the benchmark test.
+
+For additional configuration options, run `python deploy.py --help`
+
+#### Case 1: Baseline Deployment with Rerank
+
+Deploy Command (with node number, Hugging Face token, model directory specified):
+```bash
+python deploy.py --hf-token $HF_TOKEN --model-dir $MODEL_DIR --num-nodes 2 --with-rerank
+```
+Uninstall Command:
+```bash
+python deploy.py --uninstall
+```
+
+#### Case 2: Baseline Deployment without Rerank
+
+```bash
+python deploy.py --hf-token $HFTOKEN --model-dir $MODELDIR --num-nodes 2
+```
+#### Case 3: Tuned Deployment with Rerank
+
+```bash
+python deploy.py --hf-token $HFTOKEN --model-dir $MODELDIR --num-nodes 2 --with-rerank --tuned
+```
+
+## Benchmark
+
+### Test Configurations
+
+| Key      | Value   |
+| -------- | ------- |
+| Workload | ChatQnA |
+| Tag      | V1.1    |
+
+Models configuration
+| Key | Value |
+| ---------- | ------------------ |
+| Embedding | BAAI/bge-base-en-v1.5 |
+| Reranking | BAAI/bge-reranker-base |
+| Inference | Intel/neural-chat-7b-v3-3 |
+
+Benchmark parameters
+| Key | Value |
+| ---------- | ------------------ |
+| LLM input tokens | 1024 |
+| LLM output tokens | 128 |
+
+Number of test requests for different scheduled node number:
+| Node count | Concurrency | Query number |
+| ----- | -------- | -------- |
+| 1 | 128 | 640 |
+| 2 | 256 | 1280 |
+| 4 | 512 | 2560 |
+
+More detailed configuration can be found in configuration file [benchmark.yaml](./benchmark.yaml).
+
+### Test Steps
+
+Use `kubectl get pods` to confirm that all pods are `READY` before starting the test.
+
+#### Upload Retrieval File
+
+Before testing, upload a specified file to make sure the llm input have the token length of 1k.
+
+Get files:
+
+```bash
+wget https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/data/upload_file_no_rerank.txt
+wget https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/data/upload_file.txt
+```
+
+Retrieve the `ClusterIP` of the `chatqna-data-prep` service.
+
+```bash
+kubectl get svc
+```
+Expected output:
+```log
+chatqna-data-prep         ClusterIP   xx.xx.xx.xx    <none>        6007/TCP            51m
+```
+
+Use the following `cURL` command to upload file:
+
+```bash
+cd GenAIEval/evals/benchmark/data
+# RAG with Rerank
+curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F "files=@./upload_file.txt"
+# RAG without Rerank
+curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F "files=@./upload_file_no_rerank.txt"
+```
+
+#### Run Benchmark Test
+
+Run the benchmark test using:
+```bash
+bash benchmark.sh -n 2
+```
+The `-n` argument specifies the number of test nodes. Required dependencies will be automatically installed when running the benchmark for the first time.
+
+#### Data collection
+
+All the test results will come to the folder `GenAIEval/evals/benchmark/benchmark_output`.
+
+## Teardown
+
+After completing the benchmark, use the following commands to clean up the environment:
+
+Remove Node Labels:
+```base
+python deploy.py --delete-label
+```
+Delete the OPEA Helm Repository:
+```bash
+python deploy.py --delete-repo
+```
diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.sh b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.sh
new file mode 100755
index 0000000000..ba69f4e963
--- /dev/null
+++ b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+deployment_type="k8s"
+node_number=1
+service_port=8888
+query_per_node=640
+
+benchmark_tool_path="$(pwd)/GenAIEval"
+
+usage() {
+    echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
+    echo "  -d deployment_type    ChatQnA deployment type, select between k8s and docker (default: k8s)"
+    echo "  -n node_number        Test node number, required only for k8s deployment_type, (default: 1)"
+    echo "  -i service_ip         chatqna service ip, required only for docker deployment_type"
+    echo "  -p service_port       chatqna service port, required only for docker deployment_type, (default: 8888)"
+    exit 1
+}
+
+while getopts ":d:n:i:p:" opt; do
+    case ${opt} in
+        d )
+            deployment_type=$OPTARG
+            ;;
+        n )
+            node_number=$OPTARG
+            ;;
+        i )
+            service_ip=$OPTARG
+            ;;
+        p )
+            service_port=$OPTARG
+            ;;
+        \? )
+            echo "Invalid option: -$OPTARG" 1>&2
+            usage
+            ;;
+        : )
+            echo "Invalid option: -$OPTARG requires an argument" 1>&2
+            usage
+            ;;
+    esac
+done
+
+if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
+    echo "Error: service_ip is required for docker deployment_type" 1>&2
+    usage
+fi
+
+if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
+    echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
+fi
+
+function main() {
+    if [[ ! -d ${benchmark_tool_path} ]]; then
+        echo "Benchmark tool not found, setting up..."
+        setup_env
+    fi
+    run_benchmark
+}
+
+function setup_env() {
+    git clone https://github.com/opea-project/GenAIEval.git
+    pushd ${benchmark_tool_path}
+    python3 -m venv stress_venv
+    source stress_venv/bin/activate
+    pip install -r requirements.txt
+    popd
+}
+
+function run_benchmark() {
+    source ${benchmark_tool_path}/stress_venv/bin/activate
+    export DEPLOYMENT_TYPE=${deployment_type}
+    export SERVICE_IP=${service_ip:-"None"}
+    export SERVICE_PORT=${service_port:-"None"}
+    if [[ -z $USER_QUERIES ]]; then
+        user_query=$((query_per_node*node_number))
+        export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
+        echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
+    fi
+    export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
+    if [[ -z $WARMUP ]]; then export WARMUP=0; fi
+    if [[ -z $TEST_OUTPUT_DIR ]]; then
+        if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
+        else
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
+        fi
+        echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
+    fi
+
+    envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
+    cd ${benchmark_tool_path}/evals/benchmark
+    python benchmark.py
+}
+
+main
diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.yaml b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.yaml
new file mode 100644
index 0000000000..1d4ae4794e
--- /dev/null
+++ b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.yaml
@@ -0,0 +1,69 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+test_suite_config: # Overall configuration settings for the test suite
+  examples: ["chatqna"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
+  deployment_type: ${DEPLOYMENT_TYPE}  # Default is "k8s", can also be "docker"
+  service_ip: ${SERVICE_IP}  # Leave as None for k8s, specify for Docker
+  service_port: ${SERVICE_PORT}  # Leave as None for k8s, specify for Docker
+  warm_ups: ${WARMUP}  # Number of test requests for warm-up
+  run_time: 60m  # The max total run time for the test suite
+  seed:  # The seed for all RNGs
+  user_queries: ${USER_QUERIES}  # Number of test requests at each concurrency level
+  query_timeout: 120  # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
+  random_prompt: false  # Use random prompts if true, fixed prompts if false
+  collect_service_metric: false  # Collect service metrics if true, do not collect service metrics if false
+  data_visualization: false # Generate data visualization if true, do not generate data visualization if false
+  llm_model: "Intel/neural-chat-7b-v3-3"  # The LLM model used for the test
+  test_output_dir: "${TEST_OUTPUT_DIR}"  # The directory to store the test output
+  load_shape:              # Tenant concurrency pattern
+    name: constant           # poisson or constant(locust default load shape)
+    params:                  # Loadshape-specific parameters
+      constant:                # Constant load shape specific parameters, activate only if load_shape.name is constant
+        concurrent_level: 5      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
+        # arrival_rate: 1.0       # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
+      poisson:                 # Poisson load shape specific parameters, activate only if load_shape.name is poisson
+        arrival_rate: 1.0        # Request arrival rate
+  namespace: "my-chatqna"
+
+test_cases:
+  chatqna:
+    embedding:
+      run_test: false
+      service_name: "chatqna-embedding-usvc"  # Replace with your service name
+    embedserve:
+      run_test: false
+      service_name: "chatqna-tei"  # Replace with your service name
+    retriever:
+      run_test: false
+      service_name: "chatqna-retriever-usvc"  # Replace with your service name
+      parameters:
+        search_type: "similarity"
+        k: 4
+        fetch_k: 20
+        lambda_mult: 0.5
+        score_threshold: 0.2
+    reranking:
+      run_test: false
+      service_name: "chatqna-reranking-usvc"  # Replace with your service name
+      parameters:
+        top_n: 1
+    rerankserve:
+      run_test: false
+      service_name: "chatqna-teirerank"  # Replace with your service name
+    llm:
+      run_test: false
+      service_name: "chatqna-llm-uservice"  # Replace with your service name
+      parameters:
+        max_tokens: 128
+        temperature: 0.01
+        top_k: 10
+        top_p: 0.95
+        repetition_penalty: 1.03
+        streaming: true
+    llmserve:
+      run_test: false
+      service_name: "chatqna-tgi"  # Replace with your service name
+    e2e:
+      run_test: true
+      service_name: "chatqna"  # Replace with your service name
diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/deploy.py b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/deploy.py
new file mode 100644
index 0000000000..fe8af99e74
--- /dev/null
+++ b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/deploy.py
@@ -0,0 +1,355 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+import glob
+import json
+import os
+import shutil
+import subprocess
+import sys
+
+import yaml
+from generate_helm_values import generate_helm_values
+
+
+def run_kubectl_command(command):
+    """Run a kubectl command and return the output."""
+    try:
+        result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        return result.stdout
+    except subprocess.CalledProcessError as e:
+        print(f"Error running command: {command}\n{e.stderr}")
+        exit(1)
+
+
+def get_all_nodes():
+    """Get the list of all nodes in the Kubernetes cluster."""
+    command = ["kubectl", "get", "nodes", "-o", "json"]
+    output = run_kubectl_command(command)
+    nodes = json.loads(output)
+    return [node["metadata"]["name"] for node in nodes["items"]]
+
+
+def add_label_to_node(node_name, label):
+    """Add a label to the specified node."""
+    command = ["kubectl", "label", "node", node_name, label, "--overwrite"]
+    print(f"Labeling node {node_name} with {label}...")
+    run_kubectl_command(command)
+    print(f"Label {label} added to node {node_name} successfully.")
+
+
+def add_labels_to_nodes(node_count=None, label=None, node_names=None):
+    """Add a label to the specified number of nodes or to specified nodes."""
+
+    if node_names:
+        # Add label to the specified nodes
+        for node_name in node_names:
+            add_label_to_node(node_name, label)
+    else:
+        # Fetch the node list and label the specified number of nodes
+        all_nodes = get_all_nodes()
+        if node_count is None or node_count > len(all_nodes):
+            print(f"Error: Node count exceeds the number of available nodes ({len(all_nodes)} available).")
+            sys.exit(1)
+
+        selected_nodes = all_nodes[:node_count]
+        for node_name in selected_nodes:
+            add_label_to_node(node_name, label)
+
+
+def clear_labels_from_nodes(label, node_names=None):
+    """Clear the specified label from specific nodes if provided, otherwise from all nodes."""
+    label_key = label.split("=")[0]  # Extract key from 'key=value' format
+
+    # If specific nodes are provided, use them; otherwise, get all nodes
+    nodes_to_clear = node_names if node_names else get_all_nodes()
+
+    for node_name in nodes_to_clear:
+        # Check if the node has the label by inspecting its metadata
+        command = ["kubectl", "get", "node", node_name, "-o", "json"]
+        node_info = run_kubectl_command(command)
+        node_metadata = json.loads(node_info)
+
+        # Check if the label exists on this node
+        labels = node_metadata["metadata"].get("labels", {})
+        if label_key in labels:
+            # Remove the label from the node
+            command = ["kubectl", "label", "node", node_name, f"{label_key}-"]
+            print(f"Removing label {label_key} from node {node_name}...")
+            run_kubectl_command(command)
+            print(f"Label {label_key} removed from node {node_name} successfully.")
+        else:
+            print(f"Label {label_key} not found on node {node_name}, skipping.")
+
+
+def add_helm_repo(repo_name, repo_url):
+    # Add the repo if it does not exist
+    add_command = ["helm", "repo", "add", repo_name, repo_url]
+    try:
+        subprocess.run(add_command, check=True)
+        print(f"Added Helm repo {repo_name} from {repo_url}.")
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to add Helm repo {repo_name}: {e}")
+
+
+def delete_helm_repo(repo_name):
+    """Delete Helm repo if it exists."""
+    command = ["helm", "repo", "remove", repo_name]
+    try:
+        subprocess.run(command, check=True)
+        print(f"Deleted Helm repo {repo_name}.")
+    except subprocess.CalledProcessError:
+        print(f"Failed to delete Helm repo {repo_name}. It may not exist.")
+
+
+def configmap_exists(name, namespace):
+    """Check if a ConfigMap exists in the specified namespace."""
+    check_command = ["kubectl", "get", "configmap", name, "-n", namespace]
+    result = subprocess.run(check_command, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    return result.returncode == 0
+
+
+def create_configmap(name, namespace, data):
+    """Create a ConfigMap if it does not already exist."""
+    if configmap_exists(name, namespace):
+        print(f"ConfigMap '{name}' already exists in namespace '{namespace}', skipping creation.")
+    else:
+        create_command = (
+            ["kubectl", "create", "configmap", name]
+            + [f"--from-literal={k}={v}" for k, v in data.items()]
+            + ["-n", namespace]
+        )
+        print(f"Creating ConfigMap '{name}' in namespace '{namespace}'...")
+        subprocess.run(create_command, check=True)
+        print(f"ConfigMap '{name}' created successfully.")
+
+
+def delete_configmap(name, namespace):
+    """Delete a ConfigMap if it exists."""
+    if configmap_exists(name, namespace):
+        delete_command = ["kubectl", "delete", "configmap", name, "-n", namespace]
+        print(f"Deleting ConfigMap '{name}'...")
+        subprocess.run(delete_command, check=True)
+        print(f"ConfigMap '{name}' deleted successfully.")
+    else:
+        print(f"ConfigMap '{name}' does not exist in namespace '{namespace}', skipping deletion.")
+
+
+def install_helm_release(release_name, chart_name, namespace, values_file, device_type):
+    """Deploy a Helm release with a specified name and chart.
+
+    Parameters:
+    - release_name: The name of the Helm release.
+    - chart_name: The Helm chart name or path, e.g., "opea/chatqna".
+    - namespace: The Kubernetes namespace for deployment.
+    - values_file: The user values file for deployment.
+    - device_type: The device type (e.g., "gaudi") for specific configurations (optional).
+    - extra_env_configmap_name: Name of the ConfigMap for extra environment variables (default "extra-env").
+    """
+
+    # Check if the namespace exists; if not, create it
+    try:
+        # Check if the namespace exists
+        command = ["kubectl", "get", "namespace", namespace]
+        subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        # Namespace does not exist, create it
+        print(f"Namespace '{namespace}' does not exist. Creating it...")
+        command = ["kubectl", "create", "namespace", namespace]
+        subprocess.run(command, check=True)
+        print(f"Namespace '{namespace}' created successfully.")
+
+    # This is workaround for teirerank-gaudi, will be removed later
+    create_configmap("extra-env", namespace, {"MAX_WARMUP_SEQUENCE_LENGTH": "512"})
+
+    # Handle gaudi-specific values file if device_type is "gaudi"
+    hw_values_file = None
+    untar_dir = None
+    if device_type == "gaudi":
+        print("Device type is gaudi. Pulling Helm chart to get gaudi-values.yaml...")
+
+        # Pull and untar the chart
+        subprocess.run(["helm", "pull", chart_name, "--untar"], check=True)
+
+        # Determine the directory name (get the actual chart_name if chart_name is in the format 'repo_name/chart_name', else use chart_name directly)
+        chart_dir_name = chart_name.split("/")[-1] if "/" in chart_name else chart_name
+
+        # Find the untarred directory (assumes only one directory matches chart_dir_name)
+        untar_dirs = glob.glob(f"{chart_dir_name}*")
+        if untar_dirs:
+            untar_dir = untar_dirs[0]
+            hw_values_file = os.path.join(untar_dir, "gaudi-values.yaml")
+            print("gaudi-values.yaml pulled and ready for use.")
+        else:
+            print(f"Error: Could not find untarred directory for {chart_name}")
+            return
+
+    # Prepare the Helm install command
+    command = ["helm", "install", release_name, chart_name, "--namespace", namespace]
+
+    # Append additional values file for gaudi if it exists
+    if hw_values_file:
+        command.extend(["-f", hw_values_file])
+
+    # Append the main values file
+    command.extend(["-f", values_file])
+
+    # Execute the Helm install command
+    try:
+        print(f"Running command: {' '.join(command)}")  # Print full command for debugging
+        subprocess.run(command, check=True)
+        print("Deployment initiated successfully.")
+    except subprocess.CalledProcessError as e:
+        print(f"Error occurred while deploying Helm release: {e}")
+
+    # Cleanup: Remove the untarred directory
+    if untar_dir and os.path.isdir(untar_dir):
+        print(f"Removing temporary directory: {untar_dir}")
+        shutil.rmtree(untar_dir)
+        print("Temporary directory removed successfully.")
+
+
+def uninstall_helm_release(release_name, namespace=None):
+    """Uninstall a Helm release and clean up resources, optionally delete the namespace if not 'default'."""
+    # Default to 'default' namespace if none is specified
+    if not namespace:
+        namespace = "default"
+
+    try:
+        # This is workaround for teirerank-gaudi, will be removed later
+        delete_configmap("extra-env", namespace)
+
+        # Uninstall the Helm release
+        command = ["helm", "uninstall", release_name, "--namespace", namespace]
+        print(f"Uninstalling Helm release {release_name} in namespace {namespace}...")
+        run_kubectl_command(command)
+        print(f"Helm release {release_name} uninstalled successfully.")
+
+        # If the namespace is specified and not 'default', delete it
+        if namespace != "default":
+            print(f"Deleting namespace {namespace}...")
+            delete_namespace_command = ["kubectl", "delete", "namespace", namespace]
+            run_kubectl_command(delete_namespace_command)
+            print(f"Namespace {namespace} deleted successfully.")
+        else:
+            print("Namespace is 'default', skipping deletion.")
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error occurred while uninstalling Helm release or deleting namespace: {e}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Manage Helm Deployment.")
+    parser.add_argument(
+        "--release-name",
+        type=str,
+        default="chatqna",
+        help="The Helm release name created during deployment (default: chatqna).",
+    )
+    parser.add_argument(
+        "--chart-name",
+        type=str,
+        default="opea/chatqna",
+        help="The chart name to deploy, composed of repo name and chart name (default: opea/chatqna).",
+    )
+    parser.add_argument("--namespace", default="default", help="Kubernetes namespace (default: default).")
+    parser.add_argument("--hf-token", help="Hugging Face API token.")
+    parser.add_argument(
+        "--model-dir", help="Model directory, mounted as volumes for service access to pre-downloaded models"
+    )
+    parser.add_argument("--repo-name", default="opea", help="Helm repo name to add/delete (default: opea).")
+    parser.add_argument(
+        "--repo-url",
+        default="https://opea-project.github.io/GenAIInfra",
+        help="Helm repository URL (default: https://opea-project.github.io/GenAIInfra).",
+    )
+    parser.add_argument("--user-values", help="Path to a user-specified values.yaml file.")
+    parser.add_argument(
+        "--create-values-only", action="store_true", help="Only create the values.yaml file without deploying."
+    )
+    parser.add_argument("--uninstall", action="store_true", help="Uninstall the Helm release.")
+    parser.add_argument("--num-nodes", type=int, default=1, help="Number of nodes to use (default: 1).")
+    parser.add_argument("--node-names", nargs="*", help="Optional specific node names to label.")
+    parser.add_argument("--add-label", action="store_true", help="Add label to specified nodes if this flag is set.")
+    parser.add_argument(
+        "--delete-label", action="store_true", help="Delete label from specified nodes if this flag is set."
+    )
+    parser.add_argument(
+        "--label", default="node-type=opea-benchmark", help="Label to add/delete (default: node-type=opea-benchmark)."
+    )
+    parser.add_argument("--with-rerank", action="store_true", help="Include rerank service in the deployment.")
+    parser.add_argument(
+        "--tuned",
+        action="store_true",
+        help="Modify resources for services and change extraCmdArgs when creating values.yaml.",
+    )
+    parser.add_argument("--add-repo", action="store_true", help="Add the Helm repo specified by --repo-url.")
+    parser.add_argument("--delete-repo", action="store_true", help="Delete the Helm repo specified by --repo-name.")
+    parser.add_argument(
+        "--device-type",
+        type=str,
+        choices=["cpu", "gaudi"],
+        default="gaudi",
+        help="Specify the device type for deployment (choices: 'cpu', 'gaudi'; default: gaudi).",
+    )
+
+    args = parser.parse_args()
+
+    # Adjust num-nodes based on node-names if specified
+    if args.node_names:
+        num_node_names = len(args.node_names)
+        if args.num_nodes != 1 and args.num_nodes != num_node_names:
+            parser.error("--num-nodes must match the number of --node-names if both are specified.")
+        else:
+            args.num_nodes = num_node_names
+
+    # Helm repository management
+    if args.add_repo:
+        add_helm_repo(args.repo_name, args.repo_url)
+        return
+    elif args.delete_repo:
+        delete_helm_repo(args.repo_name)
+        return
+
+    # Node labeling management
+    if args.add_label:
+        add_labels_to_nodes(args.num_nodes, args.label, args.node_names)
+        return
+    elif args.delete_label:
+        clear_labels_from_nodes(args.label, args.node_names)
+        return
+
+    # Uninstall Helm release if specified
+    if args.uninstall:
+        uninstall_helm_release(args.release_name, args.namespace)
+        return
+
+    # Prepare values.yaml if not uninstalling
+    if args.user_values:
+        values_file_path = args.user_values
+    else:
+        if not args.hf_token:
+            parser.error("--hf-token are required")
+        node_selector = {args.label.split("=")[0]: args.label.split("=")[1]}
+        values_file_path = generate_helm_values(
+            with_rerank=args.with_rerank,
+            num_nodes=args.num_nodes,
+            hf_token=args.hf_token,
+            model_dir=args.model_dir,
+            node_selector=node_selector,
+            tune=args.tuned,
+        )
+
+    # Read back the generated YAML file for verification
+    with open(values_file_path, "r") as file:
+        print("Generated YAML contents:")
+        print(file.read())
+
+    # Deploy unless --create-values-only is specified
+    if not args.create_values_only:
+        install_helm_release(args.release_name, args.chart_name, args.namespace, values_file_path, args.device_type)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/generate_helm_values.py b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/generate_helm_values.py
new file mode 100644
index 0000000000..b288818009
--- /dev/null
+++ b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/generate_helm_values.py
@@ -0,0 +1,167 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import yaml
+
+
+def generate_helm_values(with_rerank, num_nodes, hf_token, model_dir, node_selector=None, tune=False):
+    """Create a values.yaml file based on the provided configuration."""
+
+    # Log the received parameters
+    print("Received parameters:")
+    print(f"with_rerank: {with_rerank}")
+    print(f"num_nodes: {num_nodes}")
+    print(f"node_selector: {node_selector}")  # Log the node_selector
+    print(f"tune: {tune}")
+
+    if node_selector is None:
+        node_selector = {}
+
+    # Construct the base values dictionary
+    values = {
+        "tei": {"nodeSelector": {key: value for key, value in node_selector.items()}},
+        "tgi": {"nodeSelector": {key: value for key, value in node_selector.items()}},
+        "data-prep": {"nodeSelector": {key: value for key, value in node_selector.items()}},
+        "redis-vector-db": {"nodeSelector": {key: value for key, value in node_selector.items()}},
+        "retriever-usvc": {"nodeSelector": {key: value for key, value in node_selector.items()}},
+        "llm-uservice": {"nodeSelector": {key: value for key, value in node_selector.items()}},
+        "embedding-usvc": {"nodeSelector": {key: value for key, value in node_selector.items()}},
+        "chatqna-ui": {"nodeSelector": {key: value for key, value in node_selector.items()}},
+        "global": {
+            "HUGGINGFACEHUB_API_TOKEN": hf_token,  # Use passed token
+            "modelUseHostPath": model_dir,  # Use passed model directory
+            "extraEnvConfig": "extra-env",  # Added MAX_WARMUP_SEQUENCE_LENGTH: 512 to extra-env in deploy.py
+        },
+        "nodeSelector": {key: value for key, value in node_selector.items()},
+    }
+
+    if with_rerank:
+        values["teirerank"] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
+        values["reranking-usvc"] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
+    else:
+        values["image"] = {"repository": "opea/chatqna-without-rerank"}
+
+    default_replicas = [
+        {"name": "chatqna", "replicaCount": 2},
+        {"name": "tei", "replicaCount": 1},
+        {"name": "teirerank", "replicaCount": 1} if with_rerank else None,
+        {"name": "tgi", "replicaCount": 7 if with_rerank else 8},
+        {"name": "data-prep", "replicaCount": 1},
+        {"name": "redis-vector-db", "replicaCount": 1},
+        {"name": "retriever-usvc", "replicaCount": 2},
+    ]
+
+    if num_nodes > 1:
+        # Scale replicas based on number of nodes
+        replicas = [
+            {"name": "chatqna", "replicaCount": 1 * num_nodes},
+            {"name": "tei", "replicaCount": 1 * num_nodes},
+            {"name": "teirerank", "replicaCount": 1} if with_rerank else None,
+            {"name": "tgi", "replicaCount": (8 * num_nodes - 1) if with_rerank else 8 * num_nodes},
+            {"name": "data-prep", "replicaCount": 1},
+            {"name": "redis-vector-db", "replicaCount": 1},
+            {"name": "retriever-usvc", "replicaCount": 1 * num_nodes},
+        ]
+    else:
+        replicas = default_replicas
+
+    # Remove None values for rerank disabled
+    replicas = [r for r in replicas if r]
+
+    # Update values.yaml with replicas
+    for replica in replicas:
+        service_name = replica["name"]
+        if service_name == "chatqna":
+            values["replicaCount"] = replica["replicaCount"]
+            print(replica["replicaCount"])
+        elif service_name in values:
+            values[service_name]["replicaCount"] = replica["replicaCount"]
+
+    # Prepare resource configurations based on tuning
+    resources = []
+    if tune:
+        resources = [
+            {
+                "name": "chatqna",
+                "resources": {
+                    "limits": {"cpu": "16", "memory": "8000Mi"},
+                    "requests": {"cpu": "16", "memory": "8000Mi"},
+                },
+            },
+            {
+                "name": "tei",
+                "resources": {
+                    "limits": {"cpu": "80", "memory": "20000Mi"},
+                    "requests": {"cpu": "80", "memory": "20000Mi"},
+                },
+            },
+            {"name": "teirerank", "resources": {"limits": {"habana.ai/gaudi": 1}}} if with_rerank else None,
+            {"name": "tgi", "resources": {"limits": {"habana.ai/gaudi": 1}}},
+            {"name": "retriever-usvc", "resources": {"requests": {"cpu": "8", "memory": "8000Mi"}}},
+        ]
+
+        # Filter out any None values directly as part of initialization
+        resources = [r for r in resources if r is not None]
+
+        # Add resources for each service if tuning
+        for resource in resources:
+            service_name = resource["name"]
+            if service_name == "chatqna":
+                values["resources"] = resource["resources"]
+            elif service_name in values:
+                values[service_name]["resources"] = resource["resources"]
+
+        # Add extraCmdArgs for tgi service with default values
+        if "tgi" in values:
+            values["tgi"]["extraCmdArgs"] = [
+                "--max-input-length",
+                "1280",
+                "--max-total-tokens",
+                "2048",
+                "--max-batch-total-tokens",
+                "65536",
+                "--max-batch-prefill-tokens",
+                "4096",
+            ]
+
+    yaml_string = yaml.dump(values, default_flow_style=False)
+
+    # Determine the mode based on the 'tune' parameter
+    mode = "tuned" if tune else "oob"
+
+    # Determine the filename based on 'with_rerank' and 'num_nodes'
+    if with_rerank:
+        filename = f"{mode}_{num_nodes}_gaudi_with_rerank.yaml"
+    else:
+        filename = f"{mode}_{num_nodes}_gaudi_without_rerank.yaml"
+
+    # Write the YAML data to the file
+    with open(filename, "w") as file:
+        file.write(yaml_string)
+
+    # Get the current working directory and construct the file path
+    current_dir = os.getcwd()
+    filepath = os.path.join(current_dir, filename)
+
+    print(f"YAML file {filepath} has been generated.")
+    return filepath  # Optionally return the file path
+
+
+# Main execution for standalone use of create_values_yaml
+if __name__ == "__main__":
+    # Example values for standalone execution
+    with_rerank = True
+    num_nodes = 2
+    hftoken = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+    modeldir = "/mnt/model"
+    node_selector = {"node-type": "opea-benchmark"}
+    tune = True
+
+    filename = generate_helm_values(with_rerank, num_nodes, hftoken, modeldir, node_selector, tune)
+
+    # Read back the generated YAML file for verification
+    with open(filename, "r") as file:
+        print("Generated YAML contents:")
+        print(file.read())
diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py
index d168d1055f..95318e9613 100644
--- a/ChatQnA/chatqna.py
+++ b/ChatQnA/chatqna.py
@@ -148,6 +148,8 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
 
         next_data["inputs"] = prompt
 
+    elif self.services[cur_node].service_type == ServiceType.LLM and not llm_parameters_dict["streaming"]:
+        next_data["text"] = data["choices"][0]["message"]["content"]
     else:
         next_data = data
 
@@ -166,7 +168,10 @@ def align_generator(self, gen, **kwargs):
         try:
             # sometimes yield empty chunk, do a fallback here
             json_data = json.loads(json_str)
-            if json_data["choices"][0]["finish_reason"] != "eos_token":
+            if (
+                json_data["choices"][0]["finish_reason"] != "eos_token"
+                and "content" in json_data["choices"][0]["delta"]
+            ):
                 yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
         except Exception as e:
             yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
diff --git a/ChatQnA/chatqna.yaml b/ChatQnA/chatqna.yaml
index e8a2d27357..0344b28317 100644
--- a/ChatQnA/chatqna.yaml
+++ b/ChatQnA/chatqna.yaml
@@ -19,7 +19,7 @@ opea_micro_services:
   tei-embedding-service:
     host: ${TEI_EMBEDDING_SERVICE_IP}
     ports: ${TEI_EMBEDDING_SERVICE_PORT}
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/tei-gaudi:1.5.0
     volumes:
       - "./data:/data"
     runtime: habana
@@ -38,7 +38,7 @@ opea_micro_services:
   tgi-service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     volumes:
       - "./data:/data"
     runtime: habana
diff --git a/ChatQnA/chatqna_wrapper.py b/ChatQnA/chatqna_wrapper.py
new file mode 100644
index 0000000000..09062b5d27
--- /dev/null
+++ b/ChatQnA/chatqna_wrapper.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
+EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
+EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
+RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
+RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
+RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
+RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
+LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
+LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
+
+
+class ChatQnAService:
+    def __init__(self, host="0.0.0.0", port=8000):
+        self.host = host
+        self.port = port
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        embedding = MicroService(
+            name="embedding",
+            host=EMBEDDING_SERVICE_HOST_IP,
+            port=EMBEDDING_SERVICE_PORT,
+            endpoint="/v1/embeddings",
+            use_remote_service=True,
+            service_type=ServiceType.EMBEDDING,
+        )
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVER_SERVICE_HOST_IP,
+            port=RETRIEVER_SERVICE_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+        rerank = MicroService(
+            name="rerank",
+            host=RERANK_SERVICE_HOST_IP,
+            port=RERANK_SERVICE_PORT,
+            endpoint="/v1/reranking",
+            use_remote_service=True,
+            service_type=ServiceType.RERANK,
+        )
+        llm = MicroService(
+            name="llm",
+            host=LLM_SERVICE_HOST_IP,
+            port=LLM_SERVICE_PORT,
+            endpoint="/v1/chat/completions",
+            use_remote_service=True,
+            service_type=ServiceType.LLM,
+        )
+        self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
+        self.megaservice.flow_to(embedding, retriever)
+        self.megaservice.flow_to(retriever, rerank)
+        self.megaservice.flow_to(rerank, llm)
+        self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+
+if __name__ == "__main__":
+    chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    chatqna.add_remote_service()
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
index 4598c07ec0..8396df454f 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -26,7 +26,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
    export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service
    ```
 
@@ -48,13 +47,13 @@ docker pull opea/chatqna:latest
 docker pull opea/chatqna-ui:latest
 ```
 
-In following cases, you could build docker image from source by yourself.
-
-- Failed to download the docker image.
+NB: You should build docker image from source by yourself if:
 
-- If you want to use a specific version of Docker image.
+- You are developing off the git main branch (as the container's ports in the repo may be different from the published docker image).
+- You can't download the docker image.
+- You want to use a specific version of Docker image.
 
-Please refer to 'Build Docker Images' in below.
+Please refer to ['Build Docker Images'](#🚀-build-docker-images) in below.
 
 ## QuickStart: 3.Consume the ChatQnA Service
 
@@ -195,7 +194,7 @@ For users in China who are unable to download models directly from Huggingface,
    export HF_TOKEN=${your_hf_token}
    export HF_ENDPOINT="https://hf-mirror.com"
    model_name="Intel/neural-chat-7b-v3-3"
-   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name
+   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
    ```
 
 2. Offline
@@ -209,7 +208,7 @@ For users in China who are unable to download models directly from Huggingface,
      ```bash
      export HF_TOKEN=${your_hf_token}
      export model_path="/path/to/model"
-     docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data
+     docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
      ```
 
 ### Setup Environment Variables
@@ -324,17 +323,17 @@ For details on how to verify the correctness of the response, refer to [how-to-v
 
    ```bash
    # TGI service
-   curl http://${host_ip}:9009/generate \
+   curl http://${host_ip}:9009/v1/chat/completions \
      -X POST \
-     -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+     -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
      -H 'Content-Type: application/json'
    ```
 
    ```bash
    # vLLM Service
-   curl http://${host_ip}:9009/v1/completions \
+   curl http://${host_ip}:9009/v1/chat/completions \
      -H "Content-Type: application/json" \
-     -d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
+     -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
    ```
 
 5. MegaService
@@ -433,6 +432,66 @@ curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
      -H "Content-Type: application/json"
 ```
 
+### Profile Microservices
+
+To further analyze MicroService Performance, users could follow the instructions to profile MicroServices.
+
+#### 1. vLLM backend Service
+
+Users could follow previous section to testing vLLM microservice or ChatQnA MegaService.  
+ By default, vLLM profiling is not enabled. Users could start and stop profiling by following commands.
+
+##### Start vLLM profiling
+
+```bash
+curl http://${host_ip}:9009/start_profile \
+  -H "Content-Type: application/json" \
+  -d '{"model": "Intel/neural-chat-7b-v3-3"}'
+```
+
+Users would see below docker logs from vllm-service if profiling is started correctly.
+
+```bash
+INFO api_server.py:361] Starting profiler...
+INFO api_server.py:363] Profiler started.
+INFO:     x.x.x.x:35940 - "POST /start_profile HTTP/1.1" 200 OK
+```
+
+After vLLM profiling is started, users could start asking questions and get responses from vLLM MicroService  
+ or ChatQnA MicroService.
+
+##### Stop vLLM profiling
+
+By following command, users could stop vLLM profliing and generate a \*.pt.trace.json.gz file as profiling result  
+ under /mnt folder in vllm-service docker instance.
+
+```bash
+# vLLM Service
+curl http://${host_ip}:9009/stop_profile \
+  -H "Content-Type: application/json" \
+  -d '{"model": "Intel/neural-chat-7b-v3-3"}'
+```
+
+Users would see below docker logs from vllm-service if profiling is stopped correctly.
+
+```bash
+INFO api_server.py:368] Stopping profiler...
+INFO api_server.py:370] Profiler stopped.
+INFO:     x.x.x.x:41614 - "POST /stop_profile HTTP/1.1" 200 OK
+```
+
+After vllm profiling is stopped, users could use below command to get the \*.pt.trace.json.gz file under /mnt folder.
+
+```bash
+docker cp  vllm-service:/mnt/ .
+```
+
+##### Check profiling result
+
+Open a web browser and type "chrome://tracing" or "ui.perfetto.dev", and then load the json.gz file, you should be able  
+ to see the vLLM profiling result as below diagram.
+![image](https://github.com/user-attachments/assets/55c7097e-5574-41dc-97a7-5e87c31bc286)
+
 ## 🚀 Launch the UI
 
 ### Launch with origin port
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md
new file mode 100644
index 0000000000..f730a91aea
--- /dev/null
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md
@@ -0,0 +1,382 @@
+# Build Mega Service of ChatQnA (with Pinecone) on Xeon
+
+This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service.
+
+## 🚀 Apply Xeon Server on AWS
+
+To apply a Xeon server on AWS, start by creating an AWS account if you don't have one already. Then, head to the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home) to begin the process. Within the EC2 service, select the Amazon EC2 M7i or M7i-flex instance type to leverage the power of 4th Generation Intel Xeon Scalable processors. These instances are optimized for high-performance computing and demanding workloads.
+
+For detailed information about these instance types, you can refer to this [link](https://aws.amazon.com/ec2/instance-types/m7i/). Once you've chosen the appropriate instance type, proceed with configuring your instance settings, including network configurations, security groups, and storage options.
+
+After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed.
+
+**Certain ports in the EC2 instance need to opened up in the security group, for the microservices to work with the curl commands**
+
+> See one example below. Please open up these ports in the EC2 instance based on the IP addresses you want to allow
+
+```
+
+data_prep_service
+=====================
+Port 6007 - Open to 0.0.0.0/0
+Port 6008 - Open to 0.0.0.0/0
+
+tei_embedding_service
+=====================
+Port 6006 - Open to 0.0.0.0/0
+
+embedding
+=========
+Port 6000 - Open to 0.0.0.0/0
+
+retriever
+=========
+Port 7000 - Open to 0.0.0.0/0
+
+tei_xeon_service
+================
+Port 8808 - Open to 0.0.0.0/0
+
+reranking
+=========
+Port 8000 - Open to 0.0.0.0/0
+
+tgi-service
+===========
+Port 9009 - Open to 0.0.0.0/0
+
+llm
+===
+Port 9000 - Open to 0.0.0.0/0
+
+chaqna-xeon-backend-server
+==========================
+Port 8888 - Open to 0.0.0.0/0
+
+chaqna-xeon-ui-server
+=====================
+Port 5173 - Open to 0.0.0.0/0
+```
+
+## 🚀 Build Docker Images
+
+First of all, you need to build Docker Images locally and install the python package of it.
+
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
+cd GenAIComps
+```
+
+### 1. Build Embedding Image
+
+```bash
+docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
+```
+
+### 2. Build Retriever Image
+
+```bash
+docker build --no-cache -t opea/retriever-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/pinecone/langchain/Dockerfile .
+```
+
+### 3. Build Rerank Image
+
+```bash
+docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
+```
+
+### 4. Build LLM Image
+
+```bash
+docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
+```
+
+### 5. Build Dataprep Image
+
+```bash
+docker build --no-cache -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/langchain/Dockerfile .
+cd ..
+```
+
+### 6. Build MegaService Docker Image
+
+To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/ChatQnA/docker
+docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+cd ../../..
+```
+
+### 7. Build UI Docker Image
+
+Build frontend Docker image via below command:
+
+```bash
+cd GenAIExamples/ChatQnA/docker/ui/
+docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
+cd ../../../..
+```
+
+### 8. Build Conversational React UI Docker Image (Optional)
+
+Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
+
+**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
+
+```bash
+cd GenAIExamples/ChatQnA/docker/ui/
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT --build-arg DATAPREP_GET_FILE_ENDPOINT=$DATAPREP_GET_FILE_ENDPOINT -f ./docker/Dockerfile.react .
+cd ../../../..
+```
+
+Then run the command `docker images`, you will have the following 7 Docker Images:
+
+1. `opea/dataprep-pinecone:latest`
+2. `opea/embedding-tei:latest`
+3. `opea/retriever-pinecone:latest`
+4. `opea/reranking-tei:latest`
+5. `opea/llm-tgi:latest`
+6. `opea/chatqna:latest`
+7. `opea/chatqna-ui:latest`
+
+## 🚀 Start Microservices
+
+### Setup Environment Variables
+
+Since the `compose_pinecone.yaml` will consume some environment variables, you need to setup them in advance as below.
+
+**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
+
+> Change the External_Public_IP below with the actual IPV4 value
+
+```
+export host_ip="External_Public_IP"
+```
+
+**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable**
+
+> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value
+
+```
+export your_hf_api_token="Your_Huggingface_API_Token"
+```
+
+**Append the value of the public IP address to the no_proxy list**
+
+```
+export your_no_proxy=${your_no_proxy},"External_Public_IP"
+```
+
+\*\*Get the PINECONE_API_KEY and the INDEX_NAME
+
+```
+export pinecone_api_key=${api_key}
+export pinecone_index_name=${pinecone_index}
+```
+
+```bash
+export no_proxy=${your_no_proxy}
+export http_proxy=${your_http_proxy}
+export https_proxy=${your_http_proxy}
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
+export PINECONE_API_KEY=${pinecone_api_key}
+export PINECONE_INDEX_NAME=${pinecone_index_name}
+export INDEX_NAME=${pinecone_index_name}
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
+```
+
+Note: Please replace with `host_ip` with you external IP address, do not use localhost.
+
+### Start all the services Docker Containers
+
+> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file
+
+```bash
+cd GenAIExamples/ChatQnA/docker/xeon/
+docker compose -f compose_pinecone.yaml up -d
+```
+
+### Validate Microservices
+
+1. TEI Embedding Service
+
+```bash
+curl ${host_ip}:6006/embed \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?"}' \
+    -H 'Content-Type: application/json'
+```
+
+2. Embedding Microservice
+
+```bash
+curl http://${host_ip}:6000/v1/embeddings\
+  -X POST \
+  -d '{"text":"hello"}' \
+  -H 'Content-Type: application/json'
+```
+
+3. Retriever Microservice  
+   To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
+
+```Python
+import random
+embedding = [random.uniform(-1, 1) for _ in range(768)]
+print(embedding)
+```
+
+Then substitute your mock embedding vector for the `${your_embedding}` in the following cURL command:
+
+```bash
+curl http://${host_ip}:7000/v1/retrieval \
+  -X POST \
+  -d '{"text":"What is the revenue of Nike in 2023?","embedding":"'"${your_embedding}"'"}' \
+  -H 'Content-Type: application/json'
+```
+
+4. TEI Reranking Service
+
+```bash
+curl http://${host_ip}:8808/rerank \
+    -X POST \
+    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+    -H 'Content-Type: application/json'
+```
+
+5. Reranking Microservice
+
+```bash
+curl http://${host_ip}:8000/v1/reranking\
+  -X POST \
+  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+  -H 'Content-Type: application/json'
+```
+
+6. TGI Service
+
+```bash
+curl http://${host_ip}:9009/generate \
+  -X POST \
+  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+  -H 'Content-Type: application/json'
+```
+
+7. LLM Microservice
+
+```bash
+curl http://${host_ip}:9000/v1/chat/completions\
+  -X POST \
+  -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+  -H 'Content-Type: application/json'
+```
+
+8. MegaService
+
+```bash
+curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
+     "messages": "What is the revenue of Nike in 2023?"
+     }'
+```
+
+9. Dataprep Microservice（Optional）
+
+If you want to update the default knowledge base, you can use the following commands:
+
+Update Knowledge Base via Local File Upload:
+
+```bash
+curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F "files=@./nke-10k-2023.pdf"
+```
+
+This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
+
+Add Knowledge Base via HTTP Links:
+
+```bash
+curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+     -H "Content-Type: multipart/form-data" \
+     -F 'link_list=["https://opea.dev"]'
+```
+
+This command updates a knowledge base by submitting a list of HTTP links for processing.
+
+Also, you are able to get the file list that you uploaded:
+
+```bash
+curl -X POST "http://${host_ip}:6008/v1/dataprep/get_file" \
+     -H "Content-Type: application/json"
+```
+
+## Enable LangSmith for Monotoring Application (Optional)
+
+LangSmith offers tools to debug, evaluate, and monitor language models and intelligent agents. It can be used to assess benchmark data for each microservice. Before launching your services with `docker compose -f compose_pinecone.yaml up -d`, you need to enable LangSmith tracing by setting the `LANGCHAIN_TRACING_V2` environment variable to true and configuring your LangChain API key.
+
+Here's how you can do it:
+
+1. Install the latest version of LangSmith:
+
+```bash
+pip install -U langsmith
+```
+
+2. Set the necessary environment variables:
+
+```bash
+export LANGCHAIN_TRACING_V2=true
+export LANGCHAIN_API_KEY=ls_...
+```
+
+## 🚀 Launch the UI
+
+To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
+
+```yaml
+  chaqna-gaudi-ui-server:
+    image: opea/chatqna-ui:latest
+    ...
+    ports:
+      - "80:5173"
+```
+
+## 🚀 Launch the Conversational UI (react)
+
+To access the Conversational UI frontend, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
+
+```yaml
+  chaqna-xeon-conversation-ui-server:
+    image: opea/chatqna-conversation-ui:latest
+    ...
+    ports:
+      - "80:80"
+```
+
+![project-screenshot](../../../../assets/img/chat_ui_init.png)
+
+Here is an example of running ChatQnA:
+
+![project-screenshot](../../../../assets/img/chat_ui_response.png)
+
+Here is an example of running ChatQnA with Conversational UI (React):
+
+![project-screenshot](../../../../assets/img/conversation_ui_response.png)
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md
index 1adfe8cf17..2f9fa1b822 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md
@@ -252,9 +252,9 @@ For details on how to verify the correctness of the response, refer to [how-to-v
    Then try the `cURL` command below to validate TGI.
 
    ```bash
-   curl http://${host_ip}:6042/generate \
+   curl http://${host_ip}:6042/v1/chat/completions \
      -X POST \
-     -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+     -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
      -H 'Content-Type: application/json'
    ```
 
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 14794e8d4f..0c290b8683 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -73,7 +73,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml
similarity index 52%
rename from ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml
rename to ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml
index a1019c9ac1..f42fd6fd2d 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml
@@ -1,142 +1,121 @@
+
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+version: "3.8"
+
 services:
-  redis-vector-db:
-    image: redis/redis-stack:7.2.0-v9
-    container_name: redis-vector-db
-    ports:
-      - "6379:6379"
-      - "8001:8001"
-  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
-    container_name: dataprep-redis-server
+  dataprep-pinecone-service:
+    image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest}
+    container_name: dataprep-pinecone-server
     depends_on:
-      - redis-vector-db
       - tei-embedding-service
     ports:
       - "6007:6007"
+      - "6008:6008"
+      - "6009:6009"
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      REDIS_URL: redis://redis-vector-db:6379
-      REDIS_HOST: redis-vector-db
-      INDEX_NAME: ${INDEX_NAME}
-      TEI_ENDPOINT: http://tei-embedding-service:80
+      PINECONE_API_KEY: ${PINECONE_API_KEY}
+      PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
-    container_name: tei-embedding-gaudi-server
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-server
     ports:
-      - "8090:80"
+      - "6006:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
-    image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
-    container_name: retriever-redis-server
-    depends_on:
-      - redis-vector-db
+    image: ${REGISTRY:-opea}/retriever-pinecone:${TAG:-latest}
+    container_name: retriever-pinecone-server
     ports:
       - "7000:7000"
     ipc: host
     environment:
-      no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      REDIS_URL: redis://redis-vector-db:6379
-      REDIS_HOST: redis-vector-db
-      INDEX_NAME: ${INDEX_NAME}
+      PINECONE_API_KEY: ${PINECONE_API_KEY}
+      INDEX_NAME: ${PINECONE_INDEX_NAME}
+      PINECONE_INDEX_NAME: ${PINECONE_INDEX_NAME}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tei-reranking-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
-    container_name: tei-reranking-gaudi-server
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-reranking-server
     ports:
       - "8808:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
-  vllm-ray-service:
-    image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
-    container_name: vllm-ray-gaudi-server
+  tgi-service:
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    container_name: tgi-service
     ports:
-      - "8006:8000"
+      - "9009:80"
     volumes:
       - "./data:/data"
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      LLM_MODEL_ID: ${LLM_MODEL_ID}
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
-    command: /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL_ID --tensor_parallel_size 2 --enforce_eager True"
-  chatqna-gaudi-backend-server:
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
+  chatqna-xeon-backend-server:
     image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
-    container_name: chatqna-gaudi-backend-server
+    container_name: chatqna-xeon-backend-server
     depends_on:
-      - redis-vector-db
       - tei-embedding-service
+      - dataprep-pinecone-service
       - retriever
       - tei-reranking-service
-      - vllm-ray-service
+      - tgi-service
     ports:
       - "8888:8888"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
+      - MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server
       - EMBEDDING_SERVER_HOST_IP=tei-embedding-service
       - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
       - RETRIEVER_SERVICE_HOST_IP=retriever
       - RERANK_SERVER_HOST_IP=tei-reranking-service
       - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
-      - LLM_SERVER_HOST_IP=vllm-ray-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-8000}
-      - LLM_MODEL=${LLM_MODEL_ID}
+      - LLM_SERVER_HOST_IP=tgi-service
+      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
       - LOGFLAG=${LOGFLAG}
+      - LLM_MODEL=${LLM_MODEL_ID}
     ipc: host
     restart: always
-  chatqna-gaudi-ui-server:
+  chatqna-xeon-ui-server:
     image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
-    container_name: chatqna-gaudi-ui-server
+    container_name: chatqna-xeon-ui-server
     depends_on:
-      - chatqna-gaudi-backend-server
+      - chatqna-xeon-backend-server
     ports:
       - "5173:5173"
     environment:
@@ -145,24 +124,24 @@ services:
       - http_proxy=${http_proxy}
     ipc: host
     restart: always
-  chatqna-gaudi-nginx-server:
+  chatqna-xeon-nginx-server:
     image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
-    container_name: chatqna-gaudi-nginx-server
+    container_name: chatqna-xeon-nginx-server
     depends_on:
-      - chatqna-gaudi-backend-server
-      - chatqna-gaudi-ui-server
+      - chatqna-xeon-backend-server
+      - chatqna-xeon-ui-server
     ports:
       - "${NGINX_PORT:-80}:80"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
+      - FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
       - FRONTEND_SERVICE_PORT=5173
       - BACKEND_SERVICE_NAME=chatqna
-      - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
+      - BACKEND_SERVICE_IP=chatqna-xeon-backend-server
       - BACKEND_SERVICE_PORT=8888
-      - DATAPREP_SERVICE_IP=dataprep-redis-service
+      - DATAPREP_SERVICE_IP=dataprep-pinecone-service
       - DATAPREP_SERVICE_PORT=6007
     ipc: host
     restart: always
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
index 8d37bb83af..ad7df8fa79 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
@@ -72,7 +72,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "6042:80"
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_vllm.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_vllm.yaml
index 6e9d9ac200..3735b75f04 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_vllm.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_vllm.yaml
@@ -86,6 +86,7 @@ services:
       https_proxy: ${https_proxy}
       HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
+      VLLM_TORCH_PROFILER_DIR: "/mnt"
     command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
   chatqna-xeon-backend-server:
     image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
index e497985f8a..938a6690d3 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -57,7 +57,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
old mode 100644
new mode 100755
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
index 43aa720f02..ad56d525a4 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -26,7 +26,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
+   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
    ```
 
 3. Set up other environment variables:
@@ -103,7 +103,7 @@ docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy
 
    ```bash
    git clone https://github.com/opea-project/GenAIExamples.git
-   cd GenAIExamples/ChatQnA/docker
+   cd GenAIExamples/ChatQnA
    docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
    ```
 
@@ -123,7 +123,7 @@ docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy
 
    ```bash
    git clone https://github.com/opea-project/GenAIExamples.git
-   cd GenAIExamples/ChatQnA/docker
+   cd GenAIExamples/ChatQnA
    docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank .
    ```
 
@@ -192,7 +192,7 @@ For users in China who are unable to download models directly from Huggingface,
    export HF_TOKEN=${your_hf_token}
    export HF_ENDPOINT="https://hf-mirror.com"
    model_name="Intel/neural-chat-7b-v3-3"
-   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
+   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
    ```
 
 2. Offline
@@ -206,7 +206,7 @@ For users in China who are unable to download models directly from Huggingface,
      ```bash
      export HF_TOKEN=${your_hf_token}
      export model_path="/path/to/model"
-     docker run -p 8008:80 -v $model_path:/data --name tgi_service --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.5 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
+     docker run -p 8008:80 -v $model_path:/data --name tgi_service --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
      ```
 
 ### Setup Environment Variables
@@ -227,7 +227,7 @@ For users in China who are unable to download models directly from Huggingface,
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
+   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
    ```
 
 3. Set up other environment variables:
@@ -257,12 +257,6 @@ If use vllm for llm backend.
 docker compose -f compose_vllm.yaml up -d
 ```
 
-If use vllm-on-ray for llm backend.
-
-```bash
-docker compose -f compose_vllm_ray.yaml up -d
-```
-
 If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
 
 ```bash
@@ -332,30 +326,18 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
    Then try the `cURL` command below to validate services.
 
    ```bash
-   #TGI Service
-   curl http://${host_ip}:8005/generate \
+   # TGI service
+   curl http://${host_ip}:9009/v1/chat/completions \
      -X POST \
-     -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \
+     -d '{"model": ${LLM_MODEL_ID}, "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
      -H 'Content-Type: application/json'
    ```
 
    ```bash
-   #vLLM Service
-   curl http://${host_ip}:8007/v1/completions \
-     -H "Content-Type: application/json" \
-     -d '{
-     "model": "${LLM_MODEL_ID}",
-     "prompt": "What is Deep Learning?",
-     "max_tokens": 32,
-     "temperature": 0
-     }'
-   ```
-
-   ```bash
-   #vLLM-on-Ray Service
-   curl http://${host_ip}:8006/v1/chat/completions \
+   # vLLM Service
+   curl http://${host_ip}:9009/v1/chat/completions \
      -H "Content-Type: application/json" \
-     -d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
+     -d '{"model": ${LLM_MODEL_ID}, "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
    ```
 
 5. MegaService
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index e34f072b5c..170ab54353 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -26,25 +26,17 @@ services:
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
@@ -65,7 +57,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tei-reranking-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/tei-gaudi:1.5.0
     container_name: tei-reranking-gaudi-server
     ports:
       - "8808:80"
@@ -86,7 +78,7 @@ services:
       MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "8005:80"
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
index 7b9d391fea..7bebade290 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -26,7 +26,7 @@ services:
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tgi-guardrails-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-guardrails-server
     ports:
       - "8088:80"
@@ -65,25 +65,17 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
@@ -104,7 +96,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tei-reranking-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/tei-gaudi:1.5.0
     container_name: tei-reranking-gaudi-server
     ports:
       - "8808:80"
@@ -125,7 +117,7 @@ services:
       MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "8008:80"
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
index cd6ef69ec5..bfbbb9570b 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
@@ -26,25 +26,17 @@ services:
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
@@ -65,7 +57,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tei-reranking-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/tei-gaudi:1.5.0
     container_name: tei-reranking-gaudi-server
     ports:
       - "8808:80"
@@ -86,7 +78,7 @@ services:
       MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   vllm-service:
-    image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
+    image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
     container_name: vllm-gaudi-server
     ports:
       - "8007:80"
@@ -104,7 +96,7 @@ services:
     cap_add:
       - SYS_NICE
     ipc: host
-    command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
+    command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
   chatqna-gaudi-backend-server:
     image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
     container_name: chatqna-gaudi-backend-server
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
index 7c2323157f..524b44c1a0 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
@@ -26,25 +26,17 @@ services:
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
@@ -65,7 +57,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "8005:80"
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md
index 7448ae625c..d9684e9dbd 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md
@@ -48,16 +48,16 @@ f810f3b4d329   opea/embedding-tei:latest                               "python e
 2fa17d84605f   opea/dataprep-redis:latest                              "python prepare_doc_…"   2 minutes ago   Up 2 minutes                    0.0.0.0:6007->6007/tcp, :::6007->6007/tcp                                              dataprep-redis-server
 69e1fb59e92c   opea/retriever-redis:latest                             "/home/user/comps/re…"   2 minutes ago   Up 2 minutes                    0.0.0.0:7000->7000/tcp, :::7000->7000/tcp                                              retriever-redis-server
 313b9d14928a   opea/reranking-tei:latest                               "python reranking_te…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8000->8000/tcp, :::8000->8000/tcp                                              reranking-tei-gaudi-server
-05c40b636239   ghcr.io/huggingface/tgi-gaudi:2.0.5                     "text-generation-lau…"   2 minutes ago   Exited (1) About a minute ago                                                                                          tgi-gaudi-server
-174bd43fa6b5   ghcr.io/huggingface/tei-gaudi:latest                    "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8090->80/tcp, :::8090->80/tcp                                                  tei-embedding-gaudi-server
+174bd43fa6b5   ghcr.io/huggingface/tei-gaudi:1.5.0                    "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8090->80/tcp, :::8090->80/tcp                                                  tei-embedding-gaudi-server
+05c40b636239   ghcr.io/huggingface/tgi-gaudi:2.0.6                     "text-generation-lau…"   2 minutes ago   Exited (1) About a minute ago                                                                                          tgi-gaudi-server
 74084469aa33   redis/redis-stack:7.2.0-v9                              "/entrypoint.sh"         2 minutes ago   Up 2 minutes                    0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp   redis-vector-db
 88399dbc9e43   ghcr.io/huggingface/text-embeddings-inference:cpu-1.5   "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8808->80/tcp, :::8808->80/tcp                                                  tei-reranking-gaudi-server
 ```
 
-In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.5` Existed.
+In this case, `ghcr.io/huggingface/tgi-gaudi:2.0.6` Existed.
 
 ```
-05c40b636239   ghcr.io/huggingface/tgi-gaudi:2.0.5                     "text-generation-lau…"   2 minutes ago   Exited (1) About a minute ago                                                                                          tgi-gaudi-server
+05c40b636239   ghcr.io/huggingface/tgi-gaudi:2.0.6                     "text-generation-lau…"   2 minutes ago   Exited (1) About a minute ago                                                                                          tgi-gaudi-server
 ```
 
 Next we can check the container logs to get to know what happened during the docker start.
@@ -68,7 +68,7 @@ Check the log of container by:
 
 `docker logs <CONTAINER ID> -t`
 
-View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.5`
+View the logs of `ghcr.io/huggingface/tgi-gaudi:2.0.6`
 
 `docker logs 05c40b636239 -t`
 
@@ -97,7 +97,7 @@ So just make sure the devices are available.
 Here is another failure example:
 
 ```
-f7a08f9867f9   ghcr.io/huggingface/tgi-gaudi:2.0.5                     "text-generation-lau…"   16 seconds ago   Exited (2) 14 seconds ago                                                                                          tgi-gaudi-server
+f7a08f9867f9   ghcr.io/huggingface/tgi-gaudi:2.0.6                     "text-generation-lau…"   16 seconds ago   Exited (2) 14 seconds ago                                                                                          tgi-gaudi-server
 ```
 
 Check the log by `docker logs f7a08f9867f9 -t`.
@@ -114,7 +114,7 @@ View the docker input parameters in `./ChatQnA/docker_compose/intel/hpu/gaudi/co
 
 ```
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "8008:80"
diff --git a/ChatQnA/docker_compose/nvidia/gpu/README.md b/ChatQnA/docker_compose/nvidia/gpu/README.md
index 5cd8d3ef08..fc647a5552 100644
--- a/ChatQnA/docker_compose/nvidia/gpu/README.md
+++ b/ChatQnA/docker_compose/nvidia/gpu/README.md
@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
    export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
    ```
 
@@ -27,6 +25,8 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-ui-server,chatqna-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
    ```
 
 3. Set up other environment variables:
@@ -95,9 +95,9 @@ To construct the Mega Service, we utilize the [GenAIComps](https://github.com/op
 
 ```bash
 git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/ChatQnA/docker
+cd GenAIExamples/ChatQnA
 docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
-cd ../../..
+cd ../..
 ```
 
 ### 5. Build UI Docker Image
@@ -107,7 +107,7 @@ Construct the frontend Docker image using the command below:
 ```bash
 cd GenAIExamples/ChatQnA/ui
 docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
-cd ../../../..
+cd ../../../
 ```
 
 ### 6. Build React UI Docker Image (Optional)
@@ -117,7 +117,7 @@ Construct the frontend Docker image using the command below:
 ```bash
 cd GenAIExamples/ChatQnA/ui
 docker build --no-cache -t opea/chatqna-react-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
-cd ../../../..
+cd ../../..
 ```
 
 ### 7. Build Nginx Docker Image
@@ -156,8 +156,6 @@ Change the `xxx_MODEL_ID` below for your needs.
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
    export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
@@ -168,6 +166,8 @@ Change the `xxx_MODEL_ID` below for your needs.
    ```bash
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-ui-server,chatqna-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
    ```
 
 3. Set up other environment variables:
@@ -238,9 +238,9 @@ docker compose up -d
    Then try the `cURL` command below to validate TGI.
 
    ```bash
-   curl http://${host_ip}:8008/generate \
+   curl http://${host_ip}:9009/v1/chat/completions \
      -X POST \
-     -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \
+     -d '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
      -H 'Content-Type: application/json'
    ```
 
diff --git a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml
index c35866b101..ba504c2eb3 100644
--- a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml
+++ b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml
@@ -20,10 +20,10 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      REDIS_URL: ${REDIS_URL}
-      REDIS_HOST: ${REDIS_HOST}
+      REDIS_URL: redis://redis-vector-db:6379
+      REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
-      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -39,13 +39,6 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
     container_name: retriever-redis-server
@@ -58,12 +51,13 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      REDIS_URL: ${REDIS_URL}
+      REDIS_URL: redis://redis-vector-db:6379
+      REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
     restart: unless-stopped
   tei-reranking-service:
-    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    image: ghcr.io/huggingface/text-embeddings-inference:1.5
     container_name: tei-reranking-server
     ports:
       - "8808:80"
@@ -123,11 +117,14 @@ services:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
-      - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
-      - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
-      - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
-      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - MEGA_SERVICE_HOST_IP=chaqna-backend-server
+      - EMBEDDING_SERVER_HOST_IP=tei-embedding-service
+      - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
+      - RETRIEVER_SERVICE_HOST_IP=retriever
+      - RERANK_SERVER_HOST_IP=tei-reranking-service
+      - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
+      - LLM_SERVER_HOST_IP=tgi-service
+      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
     ipc: host
     restart: always
   chaqna-ui-server:
diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml
index 3902313208..7be5141ead 100644
--- a/ChatQnA/docker_image_build/build.yaml
+++ b/ChatQnA/docker_image_build/build.yaml
@@ -53,6 +53,12 @@ services:
       dockerfile: comps/retrievers/qdrant/haystack/Dockerfile
     extends: chatqna
     image: ${REGISTRY:-opea}/retriever-qdrant:${TAG:-latest}
+  retriever-pinecone:
+    build:
+      context: GenAIComps
+      dockerfile: comps/retrievers/pinecone/langchain/Dockerfile
+    extends: chatqna
+    image: ${REGISTRY:-opea}/retriever-pinecone:${TAG:-latest}
   reranking-tei:
     build:
       context: GenAIComps
@@ -77,24 +83,6 @@ services:
       dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
     extends: chatqna
     image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
-  llm-vllm-hpu:
-    build:
-      context: GenAIComps
-      dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu
-    extends: chatqna
-    image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
-  llm-vllm-ray:
-    build:
-      context: GenAIComps
-      dockerfile: comps/llms/text-generation/vllm/ray/Dockerfile
-    extends: chatqna
-    image: ${REGISTRY:-opea}/llm-vllm-ray:${TAG:-latest}
-  llm-vllm-ray-hpu:
-    build:
-      context: GenAIComps
-      dockerfile: comps/llms/text-generation/vllm/ray/dependency/Dockerfile
-    extends: chatqna
-    image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
   dataprep-redis:
     build:
       context: GenAIComps
@@ -107,6 +95,12 @@ services:
       dockerfile: comps/dataprep/qdrant/langchain/Dockerfile
     extends: chatqna
     image: ${REGISTRY:-opea}/dataprep-qdrant:${TAG:-latest}
+  dataprep-pinecone:
+    build:
+      context: GenAIComps
+      dockerfile: comps/dataprep/pinecone/langchain/Dockerfile
+    extends: chatqna
+    image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest}
   guardrails-tgi:
     build:
       context: GenAIComps
@@ -119,6 +113,12 @@ services:
       dockerfile: Dockerfile.cpu
     extends: chatqna
     image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
+  vllm-hpu:
+    build:
+      context: vllm-fork
+      dockerfile: Dockerfile.hpu
+    extends: chatqna
+    image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
   nginx:
     build:
       context: GenAIComps
diff --git a/ChatQnA/kubernetes/intel/README_gmc.md b/ChatQnA/kubernetes/intel/README_gmc.md
index dab86381fe..2c849c5079 100644
--- a/ChatQnA/kubernetes/intel/README_gmc.md
+++ b/ChatQnA/kubernetes/intel/README_gmc.md
@@ -18,14 +18,15 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 - retriever: opea/retriever-redis:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
 - chaqna-xeon-backend-server: opea/chatqna:latest
 
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
 For Gaudi:
 
-- tei-embedding-service: ghcr.io/huggingface/tei-gaudi:latest
-- tgi-service: gghcr.io/huggingface/tgi-gaudi:2.0.5
+tei-embedding-service: ghcr.io/huggingface/tei-gaudi:1.5.0
+tgi-service: gghcr.io/huggingface/tgi-gaudi:2.0.6
+
 
 > [NOTE]  
 > Please refer to [Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md) or [Gaudi README](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/hpu/gaudi/README.md) to build the OPEA images. These too will be available on Docker Hub soon to simplify use.
diff --git a/ChatQnA/kubernetes/intel/README_single_node.md b/ChatQnA/kubernetes/intel/README_single_node.md
new file mode 100644
index 0000000000..411f4cfdd6
--- /dev/null
+++ b/ChatQnA/kubernetes/intel/README_single_node.md
@@ -0,0 +1,53 @@
+# Deploy ChatQnA in Kubernetes Cluster on Single Node environment (Minikube)
+
+The following instructions are to deploy the ChatQnA example on a single Node using Kubernetes for testing purposes.
+## Minikube setup
+1. Install [Minikube](https://minikube.sigs.k8s.io/docs/start/) following the quickstart guide
+2. Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/)
+3. Build the container images, following the steps under "Build Docker Images" section in the [docker-compose README](../../docker_compose/intel/cpu/xeon/README.md) to checkout [GenAIComps](https://github.com/opea-project/GenAIComps.git) and build other images with your changes for development.
+```bash
+# Example on building frontend Docker image
+cd GenAIExamples/ChatQnA/ui
+docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
+# etc...
+```
+The built images should be visible in the local Docker registry. Other images which have not been built with your changes (or not present in your local Docker registry) will be pulled from [docker hub](https://hub.docker.com/u/opea) by Minikube later in step 6.
+```bash
+docker images | grep opea
+# REPOSITORY                    TAG         IMAGE ID       CREATED         SIZE
+# opea/chatqna-ui               latest      8f2fa2523b85   6 days ago      1.56GB
+# opea/chatqna                  latest      7f2602a7a266   6 days ago      821MB
+# ...
+```
+4. The built images must be imported into the Minikube registry from the local Docker registry. This can be done using `minikube load `image.
+```bash
+minikube image load opea/chatqna
+minikube image load opea/chatqna-ui
+# etc...
+```
+5. Start the minikube cluster with `minikube start`, check that the minikube container (kicbase) is up with `docker ps`
+```bash
+docker ps
+# CONTAINER ID   IMAGE                                 COMMAND                  CREATED      STATUS      PORTS                                                                                                                                  NAMES
+# de088666cef2   gcr.io/k8s-minikube/kicbase:v0.0.45   "/usr/local/bin/entr…"   2 days ago   Up 2 days   127.0.0.1:49157->22/tcp...   minikube
+```
+6. Deploy the ChatQnA application with `minikube apply -f chatqna.yaml`, check that the opea pods are in a running state with `kubectl get pods`
+```bash
+kubectl get pods
+# NAME                                      READY   STATUS             RESTARTS   AGE
+# chatqna-78b4f5865-qbzms                   1/1     Running            0          2d3h
+# chatqna-chatqna-ui-54c8dfb6cf-fll5g       1/1     Running            0          2d3h
+# etc...
+```
+
+7. Forward the port of the chatqna service from Minikube to the host, and test the service as you would a normal k8s cluster deployment
+```bash
+# port-forward to expose the chatqna endpoint from within the minikube cluster
+kubectl port-forward svc/chatqna 8888:8888
+curl http://localhost:8888/v1/chatqna \
+    -H 'Content-Type: application/json' \
+    -d '{"messages": "What is the revenue of Nike in 2023?"}'
+
+# Similarly port-forward to expose the chatqna-ui endpoint and use the UI at <machine-external-ip>:5173 in your browser
+kubectl port-forward svc/chatqna-chatqna-ui 5173:5173
+```
diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
index 08752a8da6..7265ebff5d 100644
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
@@ -554,7 +554,7 @@ spec:
           securityContext:
             {}
           image: "opea/chatqna-ui:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: ui
               containerPort: 5173
@@ -612,7 +612,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/dataprep-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: data-prep
               containerPort: 6007
@@ -687,7 +687,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "redis/redis-stack:7.2.0-v9"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: data-volume
@@ -762,7 +762,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/guardrails-tgi:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: guardrails-usvc
               containerPort: 9090
@@ -840,7 +840,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/retriever-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: retriever-usvc
               containerPort: 7000
@@ -919,7 +919,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           args:
             - "--auto-truncate"
           volumeMounts:
@@ -1010,7 +1010,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           args:
             - "--auto-truncate"
           volumeMounts:
@@ -1100,8 +1100,8 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
-          imagePullPolicy: IfNotPresent
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: model-volume
@@ -1180,8 +1180,8 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
-          imagePullPolicy: IfNotPresent
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: model-volume
@@ -1252,18 +1252,12 @@ spec:
           env:
             - name: LLM_SERVER_HOST_IP
               value: chatqna-tgi
-            - name: LLM_SERVER_PORT
-              value: "2080"
             - name: RERANK_SERVER_HOST_IP
               value: chatqna-teirerank
-            - name: RERANK_SERVER_PORT
-              value: "2082"
             - name: RETRIEVER_SERVICE_HOST_IP
               value: chatqna-retriever-usvc
             - name: EMBEDDING_SERVER_HOST_IP
               value: chatqna-tei
-            - name: EMBEDDING_SERVER_PORT
-              value: "2081"
             - name: GUARDRAIL_SERVICE_HOST_IP
               value: chatqna-guardrails-usvc
             - name: GUARDRAIL_SERVICE_PORT
@@ -1279,7 +1273,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/chatqna-guardrails:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /tmp
               name: tmp
@@ -1320,7 +1314,7 @@ spec:
     spec:
       containers:
       - image: nginx:1.27.1
-        imagePullPolicy: IfNotPresent
+        imagePullPolicy: Always
         name: nginx
         volumeMounts:
         - mountPath: /etc/nginx/conf.d
diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
index 22155dfad6..26813816ed 100644
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
@@ -454,7 +454,7 @@ spec:
           securityContext:
             {}
           image: "opea/chatqna-ui:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: ui
               containerPort: 5173
@@ -512,7 +512,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/dataprep-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: data-prep
               containerPort: 6007
@@ -587,7 +587,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "redis/redis-stack:7.2.0-v9"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: data-volume
@@ -662,7 +662,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/retriever-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: retriever-usvc
               containerPort: 7000
@@ -741,7 +741,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           args:
             - "--auto-truncate"
           volumeMounts:
@@ -832,7 +832,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           args:
             - "--auto-truncate"
           volumeMounts:
@@ -922,8 +922,8 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
-          imagePullPolicy: IfNotPresent
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: model-volume
@@ -994,18 +994,12 @@ spec:
           env:
             - name: LLM_SERVER_HOST_IP
               value: chatqna-tgi
-            - name: LLM_SERVER_PORT
-              value: "2080"
             - name: RERANK_SERVER_HOST_IP
               value: chatqna-teirerank
-            - name: RERANK_SERVER_PORT
-              value: "2082"
             - name: RETRIEVER_SERVICE_HOST_IP
               value: chatqna-retriever-usvc
             - name: EMBEDDING_SERVER_HOST_IP
               value: chatqna-tei
-            - name: EMBEDDING_SERVER_PORT
-              value: "2081"
           securityContext:
             allowPrivilegeEscalation: false
             capabilities:
@@ -1017,7 +1011,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/chatqna:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /tmp
               name: tmp
@@ -1058,7 +1052,7 @@ spec:
     spec:
       containers:
       - image: nginx:1.27.1
-        imagePullPolicy: IfNotPresent
+        imagePullPolicy: Always
         name: nginx
         volumeMounts:
         - mountPath: /etc/nginx/conf.d
diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
index 9eba55f9f9..aac57140b7 100644
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
@@ -455,7 +455,7 @@ spec:
           securityContext:
             {}
           image: "opea/chatqna-ui:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: ui
               containerPort: 5173
@@ -513,7 +513,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/dataprep-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: data-prep
               containerPort: 6007
@@ -588,7 +588,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "redis/redis-stack:7.2.0-v9"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: data-volume
@@ -663,7 +663,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/retriever-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: retriever-usvc
               containerPort: 7000
@@ -742,7 +742,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           args:
             - "--auto-truncate"
           volumeMounts:
@@ -833,7 +833,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           args:
             - "--auto-truncate"
           volumeMounts:
@@ -925,8 +925,8 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
-          imagePullPolicy: IfNotPresent
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: model-volume
@@ -997,18 +997,12 @@ spec:
           env:
             - name: LLM_SERVER_HOST_IP
               value: chatqna-tgi
-            - name: LLM_SERVER_PORT
-              value: "2080"
             - name: RERANK_SERVER_HOST_IP
               value: chatqna-teirerank
-            - name: RERANK_SERVER_PORT
-              value: "2082"
             - name: RETRIEVER_SERVICE_HOST_IP
               value: chatqna-retriever-usvc
             - name: EMBEDDING_SERVER_HOST_IP
               value: chatqna-tei
-            - name: EMBEDDING_SERVER_PORT
-              value: "2081"
           securityContext:
             allowPrivilegeEscalation: false
             capabilities:
@@ -1020,7 +1014,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/chatqna:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /tmp
               name: tmp
@@ -1061,7 +1055,7 @@ spec:
     spec:
       containers:
       - image: nginx:1.27.1
-        imagePullPolicy: IfNotPresent
+        imagePullPolicy: Always
         name: nginx
         volumeMounts:
         - mountPath: /etc/nginx/conf.d
diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml
index dd4ec145d4..a802889f8b 100644
--- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml
+++ b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml
@@ -556,7 +556,7 @@ spec:
           securityContext:
             {}
           image: "opea/chatqna-ui:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: ui
               containerPort: 5173
@@ -614,7 +614,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/dataprep-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: data-prep
               containerPort: 6007
@@ -692,7 +692,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/guardrails-tgi:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: guardrails-usvc
               containerPort: 9090
@@ -767,7 +767,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "redis/redis-stack:7.2.0-v9"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: data-volume
@@ -842,7 +842,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/retriever-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: retriever-usvc
               containerPort: 7000
@@ -920,7 +920,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/tei-gaudi:latest"
+          image: "ghcr.io/huggingface/tei-gaudi:1.5.0"
           imagePullPolicy: IfNotPresent
           args:
             - "--auto-truncate"
@@ -1013,7 +1013,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           args:
             - "--auto-truncate"
           volumeMounts:
@@ -1103,8 +1103,8 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
-          imagePullPolicy: IfNotPresent
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: model-volume
@@ -1184,8 +1184,13 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
+<<<<<<< HEAD
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
           imagePullPolicy: IfNotPresent
+=======
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
+          imagePullPolicy: Always
+>>>>>>> e3187be819ad088c24bf1b2cbb419255af0f2be3
           volumeMounts:
             - mountPath: /data
               name: model-volume
@@ -1257,18 +1262,12 @@ spec:
           env:
             - name: LLM_SERVER_HOST_IP
               value: chatqna-tgi
-            - name: LLM_SERVER_PORT
-              value: "2080"
             - name: RERANK_SERVER_HOST_IP
               value: chatqna-teirerank
-            - name: RERANK_SERVER_PORT
-              value: "2082"
             - name: RETRIEVER_SERVICE_HOST_IP
               value: chatqna-retriever-usvc
             - name: EMBEDDING_SERVER_HOST_IP
               value: chatqna-tei
-            - name: EMBEDDING_SERVER_PORT
-              value: "2081"
             - name: GUARDRAIL_SERVICE_HOST_IP
               value: chatqna-guardrails-usvc
             - name: GUARDRAIL_SERVICE_PORT
@@ -1284,7 +1283,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/chatqna-guardrails:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /tmp
               name: tmp
@@ -1325,7 +1324,7 @@ spec:
     spec:
       containers:
       - image: nginx:1.27.1
-        imagePullPolicy: IfNotPresent
+        imagePullPolicy: Always
         name: nginx
         volumeMounts:
         - mountPath: /etc/nginx/conf.d
diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml
index 988f48ca26..949e7cd8ea 100644
--- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml
+++ b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml
@@ -592,7 +592,7 @@ spec:
           securityContext:
             {}
           image: "opea/chatqna-ui:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: ui
               containerPort: 5173
@@ -650,7 +650,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/dataprep-redis:v0.9"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: data-prep
               containerPort: 6007
@@ -728,7 +728,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/embedding-tei:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: embedding-usvc
               containerPort: 6000
@@ -806,7 +806,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/llm-vllm:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: llm-uservice
               containerPort: 9000
@@ -881,7 +881,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "redis/redis-stack:7.2.0-v9"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: data-volume
@@ -956,7 +956,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/reranking-tei:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: reranking-usvc
               containerPort: 8000
@@ -1034,7 +1034,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/retriever-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: retriever-usvc
               containerPort: 7000
@@ -1106,7 +1106,7 @@ spec:
             privileged: true
             capabilities:
               add: ["SYS_NICE"]
-          image: "ghcr.io/huggingface/tei-gaudi:latest"
+          image: "ghcr.io/huggingface/tei-gaudi:1.5.0"
           imagePullPolicy: IfNotPresent
           args:
             - "--auto-truncate"
@@ -1193,7 +1193,7 @@ spec:
           securityContext:
             {}
           image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           args:
             - "--auto-truncate"
           volumeMounts:
@@ -1281,7 +1281,7 @@ spec:
             - |
               export VLLM_CPU_KVCACHE_SPACE=40 && \
               python3 -m vllm.entrypoints.openai.api_server --enforce-eager --gpu-memory-utilization 0.5 --dtype auto --model $MODEL_ID --port 2080 --tensor-parallel-size 8 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: model-volume
@@ -1363,7 +1363,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/chatqna:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /tmp
               name: tmp
@@ -1404,7 +1404,7 @@ spec:
     spec:
       containers:
       - image: nginx:1.27.1
-        imagePullPolicy: IfNotPresent
+        imagePullPolicy: Always
         name: nginx
         volumeMounts:
         - mountPath: /etc/nginx/conf.d
diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml
index 56d8720b9a..7c31d09d67 100644
--- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml
+++ b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml
@@ -455,7 +455,7 @@ spec:
           securityContext:
             {}
           image: "opea/chatqna-ui:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: ui
               containerPort: 5173
@@ -513,7 +513,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/dataprep-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: data-prep
               containerPort: 6007
@@ -588,7 +588,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "redis/redis-stack:7.2.0-v9"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: data-volume
@@ -663,7 +663,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/retriever-redis:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           ports:
             - name: retriever-usvc
               containerPort: 7000
@@ -741,7 +741,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/tei-gaudi:latest"
+          image: "ghcr.io/huggingface/tei-gaudi:1.5.0"
           imagePullPolicy: IfNotPresent
           args:
             - "--auto-truncate"
@@ -834,7 +834,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           args:
             - "--auto-truncate"
           volumeMounts:
@@ -924,8 +924,8 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
-          imagePullPolicy: IfNotPresent
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /data
               name: model-volume
@@ -997,18 +997,12 @@ spec:
           env:
             - name: LLM_SERVER_HOST_IP
               value: chatqna-tgi
-            - name: LLM_SERVER_PORT
-              value: "2080"
             - name: RERANK_SERVER_HOST_IP
               value: chatqna-teirerank
-            - name: RERANK_SERVER_PORT
-              value: "2082"
             - name: RETRIEVER_SERVICE_HOST_IP
               value: chatqna-retriever-usvc
             - name: EMBEDDING_SERVER_HOST_IP
               value: chatqna-tei
-            - name: EMBEDDING_SERVER_PORT
-              value: "2081"
           securityContext:
             allowPrivilegeEscalation: false
             capabilities:
@@ -1020,7 +1014,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/chatqna:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /tmp
               name: tmp
@@ -1061,7 +1055,7 @@ spec:
     spec:
       containers:
       - image: nginx:1.27.1
-        imagePullPolicy: IfNotPresent
+        imagePullPolicy: Always
         name: nginx
         volumeMounts:
         - mountPath: /etc/nginx/conf.d
diff --git a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
index fcc3f80416..c186d64345 100644
--- a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
@@ -22,9 +22,9 @@ function build_docker_images() {
     service_list="chatqna-guardrails chatqna-ui dataprep-redis retriever-redis guardrails-tgi nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/tei-gaudi:latest
+    docker pull ghcr.io/huggingface/tei-gaudi:1.5.0
 
     docker images && sleep 1s
 }
diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh
index 1d5b8bc8a7..23c302e8c9 100644
--- a/ChatQnA/tests/test_compose_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_on_gaudi.sh
@@ -22,9 +22,9 @@ function build_docker_images() {
     service_list="chatqna chatqna-ui dataprep-redis retriever-redis nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/tei-gaudi:latest
+    docker pull ghcr.io/huggingface/tei-gaudi:1.5.0
 
     docker images && sleep 1s
 }
diff --git a/ChatQnA/tests/test_compose_on_xeon.sh b/ChatQnA/tests/test_compose_on_xeon.sh
index f906dfabbf..3535159b3f 100644
--- a/ChatQnA/tests/test_compose_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 
     docker images && sleep 1s
diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh
new file mode 100755
index 0000000000..a95b90c160
--- /dev/null
+++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+
+function build_docker_images() {
+    cd $WORKPATH/docker_image_build
+    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
+
+    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
+    service_list="chatqna chatqna-ui dataprep-pinecone retriever-pinecone nginx"
+    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
+
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+
+    docker images && sleep 1s
+}
+
+function start_services() {
+    cd $WORKPATH/docker_compose/intel/cpu/xeon/
+    export no_proxy=${no_proxy},${ip_address}
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export PINECONE_API_KEY=${PINECONE_KEY_LANGCHAIN_TEST}
+    export PINECONE_INDEX_NAME="langchain-test"
+    export INDEX_NAME="langchain-test"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+
+    # Start Docker Containers
+    docker compose -f compose_pinecone.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
+
+    n=0
+    until [[ "$n" -ge 500 ]]; do
+        docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log
+        if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
+            break
+        fi
+        sleep 1s
+        n=$((n+1))
+    done
+}
+
+function validate_service() {
+    local URL="$1"
+    local EXPECTED_RESULT="$2"
+    local SERVICE_NAME="$3"
+    local DOCKER_NAME="$4"
+    local INPUT_DATA="$5"
+
+    if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then
+        cd $LOG_PATH
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
+    else
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+    fi
+    HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
+    RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
+
+    docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+
+
+    # check response status
+    if [ "$HTTP_STATUS" -ne "200" ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        exit 1
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+    fi
+    echo "Response"
+    echo $RESPONSE_BODY
+    echo "Expected Result"
+    echo $EXPECTED_RESULT
+    # check response body
+    if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then
+        echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
+        exit 1
+    else
+        echo "[ $SERVICE_NAME ] Content is as expected."
+    fi
+
+    sleep 1s
+}
+
+function validate_microservices() {
+    # Check if the microservices are running correctly.
+
+    # tei for embedding service
+    validate_service \
+        "${ip_address}:6006/embed" \
+        "[[" \
+        "tei-embedding" \
+        "tei-embedding-server" \
+        '{"inputs":"What is Deep Learning?"}'
+
+    sleep 1m # retrieval can't curl as expected, try to wait for more time
+
+    # test /v1/dataprep/delete_file
+    validate_service \
+       "http://${ip_address}:6009/v1/dataprep/delete_file" \
+       '{"status":true}' \
+        "dataprep_del" \
+        "dataprep-pinecone-server"
+
+
+    # test /v1/dataprep upload file
+    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
+    validate_service \
+       "http://${ip_address}:6007/v1/dataprep" \
+        "Data preparation succeeded" \
+        "dataprep_upload_file" \
+        "dataprep-pinecone-server"
+
+
+    # retrieval microservice
+    test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+    validate_service \
+        "${ip_address}:7000/v1/retrieval" \
+        " " \
+        "retrieval" \
+        "retriever-pinecone-server" \
+        "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
+
+    # tei for rerank microservice
+    echo "Validating reranking service"
+    validate_service \
+        "${ip_address}:8808/rerank" \
+        '{"index":1,"score":' \
+        "tei-rerank" \
+        "tei-reranking-server" \
+        '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
+
+
+    # tgi for llm service
+    echo "Validating llm service"
+    validate_service \
+        "${ip_address}:9009/generate" \
+        "generated_text" \
+        "tgi-llm" \
+        "tgi-service" \
+        '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
+
+}
+
+function validate_megaservice() {
+    # Curl the Mega Service
+    validate_service \
+        "${ip_address}:8888/v1/chatqna" \
+        "data: " \
+        "chatqna-megaservice" \
+        "chatqna-xeon-backend-server" \
+        '{"messages": "What is the revenue of Nike in 2023?"}'
+
+}
+
+function validate_frontend() {
+    echo "[ TEST INFO ]: --------- frontend test started ---------"
+    cd $WORKPATH/ui/svelte
+    local conda_env_name="OPEA_e2e"
+    export PATH=${HOME}/miniforge3/bin/:$PATH
+    if conda info --envs | grep -q "$conda_env_name"; then
+        echo "$conda_env_name exist!"
+    else
+        conda create -n ${conda_env_name} python=3.12 -y
+    fi
+    source activate ${conda_env_name}
+    echo "[ TEST INFO ]: --------- conda env activated ---------"
+
+    sed -i "s/localhost/$ip_address/g" playwright.config.ts
+
+    conda install -c conda-forge nodejs -y
+    npm install && npm ci && npx playwright install --with-deps
+    node -v && npm -v && pip list
+
+    exit_status=0
+    npx playwright test || exit_status=$?
+
+    if [ $exit_status -ne 0 ]; then
+        echo "[TEST INFO]: ---------frontend test failed---------"
+        exit $exit_status
+    else
+        echo "[TEST INFO]: ---------frontend test passed---------"
+    fi
+}
+
+function stop_docker() {
+    echo "In stop docker"
+    echo $WORKPATH
+    cd $WORKPATH/docker_compose/intel/cpu/xeon/
+    docker compose -f compose_pinecone.yaml down
+}
+
+function main() {
+
+    stop_docker
+
+    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+
+    start_time=$(date +%s)
+    start_services
+    end_time=$(date +%s)
+    duration=$((end_time-start_time))
+    echo "Mega service start duration is $duration s" && sleep 1s
+
+    if [ "${mode}" == "perf" ]; then
+        python3 $WORKPATH/tests/chatqna_benchmark.py
+    elif [ "${mode}" == "" ]; then
+        validate_microservices
+        echo "==== microservices validated ===="
+        validate_megaservice
+        echo "==== megaservice validated ===="
+    fi
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main
diff --git a/ChatQnA/tests/test_compose_vllm_on_gaudi.sh b/ChatQnA/tests/test_compose_vllm_on_gaudi.sh
index de6cd50ede..26bef067db 100644
--- a/ChatQnA/tests/test_compose_vllm_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_vllm_on_gaudi.sh
@@ -17,13 +17,14 @@ ip_address=$(hostname -I | awk '{print $1}')
 function build_docker_images() {
     cd $WORKPATH/docker_image_build
     git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
+    git clone https://github.com/HabanaAI/vllm-fork.git
 
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-hpu nginx"
+    service_list="chatqna chatqna-ui dataprep-redis retriever-redis vllm-hpu nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/tei-gaudi:latest
+    docker pull ghcr.io/huggingface/tei-gaudi:1.5.0
     docker images && sleep 1s
 }
 
diff --git a/ChatQnA/tests/test_compose_vllm_on_xeon.sh b/ChatQnA/tests/test_compose_vllm_on_xeon.sh
index b664a6af8c..f53fd3aeaa 100644
--- a/ChatQnA/tests/test_compose_vllm_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_vllm_on_xeon.sh
@@ -23,7 +23,7 @@ function build_docker_images() {
     service_list="chatqna chatqna-ui dataprep-redis retriever-redis vllm nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 
     docker images && sleep 1s
diff --git a/ChatQnA/tests/test_compose_vllm_ray_on_gaudi.sh b/ChatQnA/tests/test_compose_vllm_ray_on_gaudi.sh
deleted file mode 100644
index d7d1dbe6bf..0000000000
--- a/ChatQnA/tests/test_compose_vllm_ray_on_gaudi.sh
+++ /dev/null
@@ -1,183 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -e
-IMAGE_REPO=${IMAGE_REPO:-"opea"}
-IMAGE_TAG=${IMAGE_TAG:-"latest"}
-echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
-echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
-export REGISTRY=${IMAGE_REPO}
-export TAG=${IMAGE_TAG}
-
-WORKPATH=$(dirname "$PWD")
-LOG_PATH="$WORKPATH/tests"
-ip_address=$(hostname -I | awk '{print $1}')
-
-function build_docker_images() {
-    cd $WORKPATH/docker_image_build
-    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
-
-    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-ray-hpu nginx"
-    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
-
-    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/tei-gaudi:latest
-    docker images && sleep 1s
-}
-
-function start_services() {
-
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
-    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-    export INDEX_NAME="rag-redis"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-
-    # Start Docker Containers
-    docker compose -f compose_vllm_ray.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
-    n=0
-    until [[ "$n" -ge 100 ]]; do
-        echo "n=$n"
-        docker logs vllm-ray-gaudi-server > vllm_ray_service_start.log
-        if grep -q "Warmup finished" vllm_ray_service_start.log; then
-            break
-        fi
-        sleep 5s
-        n=$((n+1))
-    done
-}
-
-function validate_services() {
-    local URL="$1"
-    local EXPECTED_RESULT="$2"
-    local SERVICE_NAME="$3"
-    local DOCKER_NAME="$4"
-    local INPUT_DATA="$5"
-
-    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
-    if [ "$HTTP_STATUS" -eq 200 ]; then
-        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
-
-        local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
-
-        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
-            echo "[ $SERVICE_NAME ] Content is as expected."
-        else
-            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
-            docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-            exit 1
-        fi
-    else
-        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-        exit 1
-    fi
-    sleep 1s
-}
-
-function validate_microservices() {
-    # Check if the microservices are running correctly.
-
-    # tei for embedding service
-    validate_services \
-        "${ip_address}:8090/embed" \
-        "\[\[" \
-        "tei-embedding" \
-        "tei-embedding-gaudi-server" \
-        '{"inputs":"What is Deep Learning?"}'
-
-    sleep 1m # retrieval can't curl as expected, try to wait for more time
-
-    # retrieval microservice
-    test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
-    validate_services \
-        "${ip_address}:7000/v1/retrieval" \
-        " " \
-        "retrieval" \
-        "retriever-redis-server" \
-        "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
-
-    # tei for rerank microservice
-    validate_services \
-        "${ip_address}:8808/rerank" \
-        '{"index":1,"score":' \
-        "tei-rerank" \
-        "tei-reranking-gaudi-server" \
-        '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
-
-    # vllm-on-ray for llm service
-    validate_services \
-        "${ip_address}:8006/v1/chat/completions" \
-        "content" \
-        "vllm-ray-llm" \
-        "vllm-ray-gaudi-server" \
-        '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
-}
-
-function validate_megaservice() {
-    # Curl the Mega Service
-    validate_services \
-        "${ip_address}:8888/v1/chatqna" \
-        "data: " \
-        "mega-chatqna" \
-        "chatqna-gaudi-backend-server" \
-        '{"messages": "What is the revenue of Nike in 2023?"}'
-
-}
-
-function validate_frontend() {
-    cd $WORKPATH/ui/svelte
-    local conda_env_name="OPEA_e2e"
-    export PATH=${HOME}/miniforge3/bin/:$PATH
-    if conda info --envs | grep -q "$conda_env_name"; then
-        echo "$conda_env_name exist!"
-    else
-        conda create -n ${conda_env_name} python=3.12 -y
-    fi
-    source activate ${conda_env_name}
-
-    sed -i "s/localhost/$ip_address/g" playwright.config.ts
-
-    conda install -c conda-forge nodejs -y
-    npm install && npm ci && npx playwright install --with-deps
-    node -v && npm -v && pip list
-
-    exit_status=0
-    npx playwright test || exit_status=$?
-
-    if [ $exit_status -ne 0 ]; then
-        echo "[TEST INFO]: ---------frontend test failed---------"
-        exit $exit_status
-    else
-        echo "[TEST INFO]: ---------frontend test passed---------"
-    fi
-}
-
-function stop_docker() {
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    docker compose -f compose_vllm_ray.yaml down
-}
-
-function main() {
-
-    stop_docker
-    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
-    start_time=$(date +%s)
-    start_services
-    end_time=$(date +%s)
-    duration=$((end_time-start_time))
-    echo "Mega service start duration is $duration s"
-
-    validate_microservices
-    validate_megaservice
-    # validate_frontend
-
-    stop_docker
-    echo y | docker system prune
-
-}
-
-main
diff --git a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
index 22c5e8c947..f06a189ef7 100644
--- a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
@@ -22,9 +22,9 @@ function build_docker_images() {
     service_list="chatqna-without-rerank chatqna-ui dataprep-redis retriever-redis nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/tei-gaudi:latest
+    docker pull ghcr.io/huggingface/tei-gaudi:1.5.0
 
     docker images && sleep 1s
 }
diff --git a/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh b/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh
index b0ffc22bcd..89b4922617 100644
--- a/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="chatqna-without-rerank chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 
     docker images && sleep 1s
diff --git a/ChatQnA/tests/test_manifest_on_gaudi.sh b/ChatQnA/tests/test_manifest_on_gaudi.sh
index 2716efa4b0..8bcccab377 100755
--- a/ChatQnA/tests/test_manifest_on_gaudi.sh
+++ b/ChatQnA/tests/test_manifest_on_gaudi.sh
@@ -111,7 +111,7 @@ function _cleanup_ns() {
 
 function install_and_validate_chatqna_guardrail() {
     echo "Testing manifests chatqna_guardrils"
-    local ns=${NAMESPACE}-gaurdrails
+    local ns=${NAMESPACE}
     _cleanup_ns $ns
     kubectl create namespace $ns
     # install guardrail
@@ -119,10 +119,9 @@ function install_and_validate_chatqna_guardrail() {
     # Sleep enough time for chatqna_guardrail to be ready
     sleep 60
     if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
-        echo "Waiting for cahtqna_guardrail pod ready done!"
+        echo "Waiting for chatqna_guardrail pod ready done!"
     else
         echo "Timeout waiting for chatqna_guardrail pod ready!"
-        _cleanup_ns $ns
         exit 1
     fi
 
@@ -130,10 +129,8 @@ function install_and_validate_chatqna_guardrail() {
     validate_chatqna $ns chatqna-guardrails
     local ret=$?
     if [ $ret -ne 0 ]; then
-        _cleanup_ns $ns
         exit 1
     fi
-    _cleanup_ns $ns
 }
 
 if [ $# -eq 0 ]; then
@@ -161,8 +158,7 @@ case "$1" in
         if [ $ret -ne 0 ]; then
             exit $ret
         fi
-        pushd ChatQnA/kubernetes/intel/hpu/gaudi/manifests
-        set +e
+        pushd ChatQnA/kubernetes/intel/hpu/gaudi/manifest
         install_and_validate_chatqna_guardrail
         popd
         ;;
diff --git a/ChatQnA/tests/test_manifest_on_xeon.sh b/ChatQnA/tests/test_manifest_on_xeon.sh
index d913421a63..d405df9776 100755
--- a/ChatQnA/tests/test_manifest_on_xeon.sh
+++ b/ChatQnA/tests/test_manifest_on_xeon.sh
@@ -40,7 +40,7 @@ function get_end_point() {
 function validate_chatqna() {
     local ns=$1
     local log=$2
-    max_retry=20
+    max_retry=10
     # make sure microservice retriever-usvc is ready
     # try to curl retriever-svc for max_retry times
     test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
@@ -111,7 +111,7 @@ function _cleanup_ns() {
 
 function install_and_validate_chatqna_guardrail() {
     echo "Testing manifests chatqna_guardrils"
-    local ns=${NAMESPACE}-gaurdrails
+    local ns=${NAMESPACE}
     _cleanup_ns $ns
     kubectl create namespace $ns
     # install guardrail
@@ -119,10 +119,9 @@ function install_and_validate_chatqna_guardrail() {
     # Sleep enough time for chatqna_guardrail to be ready
     sleep 60
     if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
-        echo "Waiting for cahtqna_guardrail pod ready done!"
+        echo "Waiting for chatqna_guardrail pod ready done!"
     else
         echo "Timeout waiting for chatqna_guardrail pod ready!"
-        _cleanup_ns $ns
         exit 1
     fi
 
@@ -130,10 +129,8 @@ function install_and_validate_chatqna_guardrail() {
     validate_chatqna $ns chatqna-guardrails
     local ret=$?
     if [ $ret -ne 0 ]; then
-        _cleanup_ns $ns
         exit 1
     fi
-    _cleanup_ns $ns
 }
 
 if [ $# -eq 0 ]; then
@@ -161,8 +158,7 @@ case "$1" in
         if [ $ret -ne 0 ]; then
             exit $ret
         fi
-        pushd ChatQnA/kubernetes/intel/cpu/xeon/manifests
-        set +e
+        pushd ChatQnA/kubernetes/intel/cpu/xeon/manifest
         install_and_validate_chatqna_guardrail
         popd
         ;;
diff --git a/ChatQnA/ui/docker/Dockerfile b/ChatQnA/ui/docker/Dockerfile
index ac2bb7da31..1d5115f4b5 100644
--- a/ChatQnA/ui/docker/Dockerfile
+++ b/ChatQnA/ui/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN npm run build
 EXPOSE 5173
 
 # Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
\ No newline at end of file
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
diff --git a/ChatQnA/ui/docker/Dockerfile.react b/ChatQnA/ui/docker/Dockerfile.react
index 49bc13124c..18afc393ad 100644
--- a/ChatQnA/ui/docker/Dockerfile.react
+++ b/ChatQnA/ui/docker/Dockerfile.react
@@ -18,4 +18,4 @@ COPY --from=vite-app /usr/app/react/dist /usr/share/nginx/html
 COPY ./react/env.sh /docker-entrypoint.d/env.sh
 
 COPY ./react/nginx.conf /etc/nginx/conf.d/default.conf
-RUN chmod +x /docker-entrypoint.d/env.sh
\ No newline at end of file
+RUN chmod +x /docker-entrypoint.d/env.sh
diff --git a/ChatQnA/ui/react/.env b/ChatQnA/ui/react/.env
index e5d52f4213..ae0bd3732c 100644
--- a/ChatQnA/ui/react/.env
+++ b/ChatQnA/ui/react/.env
@@ -1,2 +1,2 @@
 VITE_BACKEND_SERVICE_ENDPOINT=http://backend_address:8888/v1/chatqna
-VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep
\ No newline at end of file
+VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep
diff --git a/ChatQnA/ui/react/.env.production b/ChatQnA/ui/react/.env.production
index a46e1e3850..9922d60127 100644
--- a/ChatQnA/ui/react/.env.production
+++ b/ChatQnA/ui/react/.env.production
@@ -1,2 +1,2 @@
 VITE_BACKEND_SERVICE_ENDPOINT=APP_BACKEND_SERVICE_ENDPOINT
-VITE_DATA_PREP_SERVICE_URL=APP_DATA_PREP_SERVICE_URL
\ No newline at end of file
+VITE_DATA_PREP_SERVICE_URL=APP_DATA_PREP_SERVICE_URL
diff --git a/ChatQnA/ui/react/nginx.conf b/ChatQnA/ui/react/nginx.conf
index 00433fcda7..01aef12751 100644
--- a/ChatQnA/ui/react/nginx.conf
+++ b/ChatQnA/ui/react/nginx.conf
@@ -17,4 +17,4 @@ server {
       expires 1d;
     }
   }
-}
\ No newline at end of file
+}
diff --git a/ChatQnA/ui/react/public/vite.svg b/ChatQnA/ui/react/public/vite.svg
index e7b8dfb1b2..ee9fadaf9c 100644
--- a/ChatQnA/ui/react/public/vite.svg
+++ b/ChatQnA/ui/react/public/vite.svg
@@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
diff --git a/ChatQnA/ui/react/src/assets/react.svg b/ChatQnA/ui/react/src/assets/react.svg
index 6c87de9bb3..8e0e0f15c0 100644
--- a/ChatQnA/ui/react/src/assets/react.svg
+++ b/ChatQnA/ui/react/src/assets/react.svg
@@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
diff --git a/ChatQnA/ui/react/src/components/Conversation/DataSource.tsx b/ChatQnA/ui/react/src/components/Conversation/DataSource.tsx
index cb7b326c9b..6f1b2ab06b 100644
--- a/ChatQnA/ui/react/src/components/Conversation/DataSource.tsx
+++ b/ChatQnA/ui/react/src/components/Conversation/DataSource.tsx
@@ -68,4 +68,4 @@ export default function DataSource({ opened, onClose }: Props) {
       </Container>
     </Drawer>
   )
-}
\ No newline at end of file
+}
diff --git a/ChatQnA/ui/react/src/components/UserInfoModal/UserInfoModal.tsx b/ChatQnA/ui/react/src/components/UserInfoModal/UserInfoModal.tsx
index 4d54180a45..4540bd4c96 100644
--- a/ChatQnA/ui/react/src/components/UserInfoModal/UserInfoModal.tsx
+++ b/ChatQnA/ui/react/src/components/UserInfoModal/UserInfoModal.tsx
@@ -45,4 +45,4 @@ const UserInfoModal = () => {
     )
 }
 
-export default UserInfoModal
\ No newline at end of file
+export default UserInfoModal
diff --git a/ChatQnA/ui/svelte/src/app.postcss b/ChatQnA/ui/svelte/src/app.postcss
index 1bb14630c8..963bbca4ef 100644
--- a/ChatQnA/ui/svelte/src/app.postcss
+++ b/ChatQnA/ui/svelte/src/app.postcss
@@ -83,4 +83,4 @@ a.btn {
 
 .w-12\/12 {
 	width: 100%
-}
\ No newline at end of file
+}
diff --git a/ChatQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg b/ChatQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg
index 9fe89acc1f..8910f0ea64 100644
--- a/ChatQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg
+++ b/ChatQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg
@@ -1 +1 @@
-<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1699596229588" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="20460" xmlns:xlink="http://www.w3.org/1999/xlink" width="32" height="32"><path d="M576 128a96 96 0 0 1 96 96v128h-224a96 96 0 0 0-95.84 90.368L352 448v224H224a96 96 0 0 1-96-96V224a96 96 0 0 1 96-96h352z" fill="#CCD9FF" p-id="20461"></path><path d="M576 96a128 128 0 0 1 128 128v128h-64V224a64 64 0 0 0-59.2-63.84L576 160H224a64 64 0 0 0-64 64v352a64 64 0 0 0 64 64h128v64H224a128 128 0 0 1-128-128V224a128 128 0 0 1 128-128z" fill="#3671FD" p-id="20462"></path><path d="M800 320H448a128 128 0 0 0-128 128v352a128 128 0 0 0 128 128h352a128 128 0 0 0 128-128V448a128 128 0 0 0-128-128z m-352 64h352a64 64 0 0 1 64 64v352a64 64 0 0 1-64 64H448a64 64 0 0 1-64-64V448a64 64 0 0 1 64-64z" fill="#3671FD" p-id="20463"></path><path d="M128 736a32 32 0 0 1 32 32 96 96 0 0 0 90.368 95.84L256 864a32 32 0 0 1 0 64 160 160 0 0 1-160-160 32 32 0 0 1 32-32z" fill="#FE9C23" p-id="20464"></path></svg>
\ No newline at end of file
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1699596229588" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="20460" xmlns:xlink="http://www.w3.org/1999/xlink" width="32" height="32"><path d="M576 128a96 96 0 0 1 96 96v128h-224a96 96 0 0 0-95.84 90.368L352 448v224H224a96 96 0 0 1-96-96V224a96 96 0 0 1 96-96h352z" fill="#CCD9FF" p-id="20461"></path><path d="M576 96a128 128 0 0 1 128 128v128h-64V224a64 64 0 0 0-59.2-63.84L576 160H224a64 64 0 0 0-64 64v352a64 64 0 0 0 64 64h128v64H224a128 128 0 0 1-128-128V224a128 128 0 0 1 128-128z" fill="#3671FD" p-id="20462"></path><path d="M800 320H448a128 128 0 0 0-128 128v352a128 128 0 0 0 128 128h352a128 128 0 0 0 128-128V448a128 128 0 0 0-128-128z m-352 64h352a64 64 0 0 1 64 64v352a64 64 0 0 1-64 64H448a64 64 0 0 1-64-64V448a64 64 0 0 1 64-64z" fill="#3671FD" p-id="20463"></path><path d="M128 736a32 32 0 0 1 32 32 96 96 0 0 0 90.368 95.84L256 864a32 32 0 0 1 0 64 160 160 0 0 1-160-160 32 32 0 0 1 32-32z" fill="#FE9C23" p-id="20464"></path></svg>
diff --git a/ChatQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg b/ChatQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
index 362a6994eb..9a77286a8f 100644
--- a/ChatQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
+++ b/ChatQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
@@ -1 +1 @@
-<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="21"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="0.5" x2="0.5" y1="1" y2="0.054371078"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="28"><g node-id="29"><g node-id="30"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#909efc" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="13" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="31"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#909efc" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="14" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="32"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="15" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="33"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="16" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#909efc" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="17" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 193.00 208.00 L 281.00 208.00 L 281.00 291.00 L 193.00 291.00 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="矩形" node-id="18" stroke="none" target-height="83" target-width="88" target-x="193" target-y="208"></path><path d="M 204.34 187.40 L 205.82 184.30 L 207.42 181.42 L 209.93 177.40 L 212.87 173.22 L 216.68 168.52 L 221.00 163.96 L 226.13 159.47 L 229.84 156.76 L 233.89 154.30 L 238.30 152.10 L 242.89 150.34 L 247.86 149.04 L 253.23 148.21 L 257.26 147.99 L 261.54 148.14 L 266.10 148.68 L 270.96 149.65 L 275.71 151.04 L 280.04 152.70 L 283.99 154.61 L 287.59 156.76 L 292.14 160.10 L 296.15 163.69 L 299.67 167.56 L 302.85 171.73 L 305.60 175.97 L 307.94 180.29 L 310.91 186.93 L 313.11 193.22 L 314.81 199.39 L 315.91 204.60 L 316.69 209.44 L 317.06 212.71 L 317.29 215.80 L 320.19 216.75 L 323.18 217.90 L 327.40 219.80 L 331.79 222.15 L 336.64 225.29 L 339.75 227.69 L 342.75 230.44 L 345.64 233.56 L 348.18 236.94 L 350.35 240.71 L 352.13 244.92 L 353.09 248.17 L 353.71 251.69 L 353.97 255.53 L 353.85 259.71 L 353.08 265.18 L 351.88 270.02 L 350.29 274.31 L 348.26 278.41 L 346.02 282.02 L 343.58 285.18 L 340.85 288.13 L 338.12 290.66 L 335.39 292.82 L 331.16 295.64 L 327.45 297.68 L 323.90 299.30 L 321.43 300.25 L 319.05 301.00 L 319.05 301.00 L 251.99 301.00 L 251.99 301.00 L 263.46 301.00 L 263.46 290.63 L 251.99 290.63 L 251.99 260.46 L 268.27 260.46 L 268.27 260.39 L 268.34 260.43 L 269.03 260.29 L 269.57 259.91 L 269.94 259.36 L 270.08 258.68 L 269.82 257.84 L 269.82 257.84 L 269.92 257.84 L 269.63 257.54 L 269.48 257.40 L 269.48 257.40 L 239.43 225.61 L 238.65 224.96 L 237.66 224.72 L 236.69 224.94 L 235.94 225.52 L 235.94 225.52 L 205.66 257.57 L 205.17 258.11 L 204.98 258.82 L 205.11 259.44 L 205.43 259.93 L 205.91 260.25 L 206.51 260.38 L 206.51 260.38 L 206.51 260.45 L 223.60 260.45 L 223.60 290.63 L 211.72 290.63 L 211.72 300.98 L 149.72 300.98 L 147.36 300.29 L 141.43 298.02 L 137.76 296.26 L 133.63 293.89 L 129.60 291.08 L 125.67 287.63 L 123.28 285.04 L 121.15 282.15 L 119.25 278.95 L 117.77 275.54 L 116.71 271.77 L 116.09 267.57 L 116.03 263.33 L 116.60 258.57 L 117.89 253.21 L 118.25 252.08 L 120.57 245.82 L 122.94 240.56 L 125.36 236.19 L 128.13 232.00 L 130.82 228.57 L 133.44 225.80 L 136.31 223.27 L 138.99 221.28 L 141.49 219.76 L 145.33 217.95 L 148.49 216.90 L 151.46 216.26 L 153.44 216.03 L 155.31 216.01 L 154.82 213.95 L 154.48 211.78 L 154.26 208.66 L 154.37 205.32 L 155.00 201.50 L 155.74 198.98 L 156.87 196.42 L 158.41 193.81 L 160.28 191.44 L 162.73 189.15 L 165.85 186.93 L 170.12 184.73 L 174.13 183.25 L 177.93 182.41 L 181.82 182.04 L 185.38 182.08 L 188.64 182.49 L 193.30 183.66 L 196.92 185.10 L 200.02 186.82 L 201.70 188.13 L 202.29 188.24 L 203.46 188.19 L 203.98 187.92 L 204.34 187.40 Z" fill="#ffffff" fill-rule="nonzero" group-id="1" id="形状结合" node-id="19" stroke="none" target-height="153.01" target-width="237.94" target-x="116.03" target-y="147.99"></path></g></svg>
\ No newline at end of file
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="21"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="0.5" x2="0.5" y1="1" y2="0.054371078"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="28"><g node-id="29"><g node-id="30"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#909efc" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="13" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="31"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#909efc" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="14" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="32"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="15" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="33"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="16" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#909efc" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="17" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 193.00 208.00 L 281.00 208.00 L 281.00 291.00 L 193.00 291.00 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="矩形" node-id="18" stroke="none" target-height="83" target-width="88" target-x="193" target-y="208"></path><path d="M 204.34 187.40 L 205.82 184.30 L 207.42 181.42 L 209.93 177.40 L 212.87 173.22 L 216.68 168.52 L 221.00 163.96 L 226.13 159.47 L 229.84 156.76 L 233.89 154.30 L 238.30 152.10 L 242.89 150.34 L 247.86 149.04 L 253.23 148.21 L 257.26 147.99 L 261.54 148.14 L 266.10 148.68 L 270.96 149.65 L 275.71 151.04 L 280.04 152.70 L 283.99 154.61 L 287.59 156.76 L 292.14 160.10 L 296.15 163.69 L 299.67 167.56 L 302.85 171.73 L 305.60 175.97 L 307.94 180.29 L 310.91 186.93 L 313.11 193.22 L 314.81 199.39 L 315.91 204.60 L 316.69 209.44 L 317.06 212.71 L 317.29 215.80 L 320.19 216.75 L 323.18 217.90 L 327.40 219.80 L 331.79 222.15 L 336.64 225.29 L 339.75 227.69 L 342.75 230.44 L 345.64 233.56 L 348.18 236.94 L 350.35 240.71 L 352.13 244.92 L 353.09 248.17 L 353.71 251.69 L 353.97 255.53 L 353.85 259.71 L 353.08 265.18 L 351.88 270.02 L 350.29 274.31 L 348.26 278.41 L 346.02 282.02 L 343.58 285.18 L 340.85 288.13 L 338.12 290.66 L 335.39 292.82 L 331.16 295.64 L 327.45 297.68 L 323.90 299.30 L 321.43 300.25 L 319.05 301.00 L 319.05 301.00 L 251.99 301.00 L 251.99 301.00 L 263.46 301.00 L 263.46 290.63 L 251.99 290.63 L 251.99 260.46 L 268.27 260.46 L 268.27 260.39 L 268.34 260.43 L 269.03 260.29 L 269.57 259.91 L 269.94 259.36 L 270.08 258.68 L 269.82 257.84 L 269.82 257.84 L 269.92 257.84 L 269.63 257.54 L 269.48 257.40 L 269.48 257.40 L 239.43 225.61 L 238.65 224.96 L 237.66 224.72 L 236.69 224.94 L 235.94 225.52 L 235.94 225.52 L 205.66 257.57 L 205.17 258.11 L 204.98 258.82 L 205.11 259.44 L 205.43 259.93 L 205.91 260.25 L 206.51 260.38 L 206.51 260.38 L 206.51 260.45 L 223.60 260.45 L 223.60 290.63 L 211.72 290.63 L 211.72 300.98 L 149.72 300.98 L 147.36 300.29 L 141.43 298.02 L 137.76 296.26 L 133.63 293.89 L 129.60 291.08 L 125.67 287.63 L 123.28 285.04 L 121.15 282.15 L 119.25 278.95 L 117.77 275.54 L 116.71 271.77 L 116.09 267.57 L 116.03 263.33 L 116.60 258.57 L 117.89 253.21 L 118.25 252.08 L 120.57 245.82 L 122.94 240.56 L 125.36 236.19 L 128.13 232.00 L 130.82 228.57 L 133.44 225.80 L 136.31 223.27 L 138.99 221.28 L 141.49 219.76 L 145.33 217.95 L 148.49 216.90 L 151.46 216.26 L 153.44 216.03 L 155.31 216.01 L 154.82 213.95 L 154.48 211.78 L 154.26 208.66 L 154.37 205.32 L 155.00 201.50 L 155.74 198.98 L 156.87 196.42 L 158.41 193.81 L 160.28 191.44 L 162.73 189.15 L 165.85 186.93 L 170.12 184.73 L 174.13 183.25 L 177.93 182.41 L 181.82 182.04 L 185.38 182.08 L 188.64 182.49 L 193.30 183.66 L 196.92 185.10 L 200.02 186.82 L 201.70 188.13 L 202.29 188.24 L 203.46 188.19 L 203.98 187.92 L 204.34 187.40 Z" fill="#ffffff" fill-rule="nonzero" group-id="1" id="形状结合" node-id="19" stroke="none" target-height="153.01" target-width="237.94" target-x="116.03" target-y="147.99"></path></g></svg>
diff --git a/ChatQnA/ui/svelte/tests/test_file.txt b/ChatQnA/ui/svelte/tests/test_file.txt
index 93fc5da94e..bfbd73c1c3 100644
--- a/ChatQnA/ui/svelte/tests/test_file.txt
+++ b/ChatQnA/ui/svelte/tests/test_file.txt
@@ -101,4 +101,4 @@ Terms of Use
 Privacy
 Sitemap
 Copyright © 2003 - 2023. All rights reserved.
-CTATECH-PROD2
\ No newline at end of file
+CTATECH-PROD2
diff --git a/CodeGen/Dockerfile b/CodeGen/Dockerfile
index aee27a8989..e0aa7d13f0 100644
--- a/CodeGen/Dockerfile
+++ b/CodeGen/Dockerfile
@@ -20,7 +20,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./codegen.py /home/user/codegen.py
diff --git a/CodeGen/README.md b/CodeGen/README.md
index 03288fb2df..013c31d373 100644
--- a/CodeGen/README.md
+++ b/CodeGen/README.md
@@ -85,12 +85,12 @@ Currently we support two ways of deploying ChatQnA services with docker compose:
 
 By default, the LLM model is set to a default value as listed below:
 
-| Service      | Model                                                                           |
-| ------------ | ------------------------------------------------------------------------------- |
-| LLM_MODEL_ID | [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) |
+| Service      | Model                                                                                   |
+| ------------ | --------------------------------------------------------------------------------------- |
+| LLM_MODEL_ID | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) |
 
-[meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) is a gated model that requires submitting an access request through Hugging Face. You can replace it with another model.
-Change the `LLM_MODEL_ID` below for your needs, such as: [Qwen/CodeQwen1.5-7B-Chat](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat), [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct)
+[Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) may be a gated model that requires submitting an access request through Hugging Face. You can replace it with another model.
+Change the `LLM_MODEL_ID` below for your needs, such as: [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct)
 
 If you choose to use `meta-llama/CodeLlama-7b-hf` as LLM model, you will need to visit [here](https://huggingface.co/meta-llama/CodeLlama-7b-hf), click the `Expand to review and access` button to ask for model access.
 
diff --git a/CodeGen/benchmark/performance/README.md b/CodeGen/benchmark/performance/README.md
new file mode 100644
index 0000000000..a9d1e9d5f6
--- /dev/null
+++ b/CodeGen/benchmark/performance/README.md
@@ -0,0 +1,77 @@
+# CodeGen Benchmarking
+
+This folder contains a collection of scripts to enable inference benchmarking by leveraging a comprehensive benchmarking tool, [GenAIEval](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md), that enables throughput analysis to assess inference performance.
+
+By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
+
+## Purpose
+
+We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
+
+- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
+- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
+- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
+
+## Metrics
+
+The benchmark will report the below metrics, including:
+
+- Number of Concurrent Requests
+- End-to-End Latency: P50, P90, P99 (in milliseconds)
+- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
+- Average Next Token Latency (in milliseconds)
+- Average Token Latency (in milliseconds)
+- Requests Per Second (RPS)
+- Output Tokens Per Second
+- Input Tokens Per Second
+
+Results will be displayed in the terminal and saved as CSV file named `1_testspec.yaml`.
+
+## Getting Started
+
+We recommend using Kubernetes to deploy the CodeGen service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs.
+
+### Prerequisites
+
+- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
+
+- Every node has direct internet access
+- Set up kubectl on the master node with access to the Kubernetes cluster.
+- Install Python 3.8+ on the master node for running GenAIEval.
+- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
+- Ensure that the container's ulimit can meet the the number of requests.
+
+```bash
+# The way to modify the containered ulimit:
+sudo systemctl edit containerd
+# Add two lines:
+[Service]
+LimitNOFILE=65536:1048576
+
+sudo systemctl daemon-reload; sudo systemctl restart containerd
+```
+
+### Test Steps
+
+Please deploy CodeGen service before benchmarking.
+
+#### Run Benchmark Test
+
+Before the benchmark, we can configure the number of test queries and test output directory by:
+
+```bash
+export USER_QUERIES="[128, 128, 128, 128]"
+export TEST_OUTPUT_DIR="/tmp/benchmark_output"
+```
+
+And then run the benchmark by:
+
+```bash
+bash benchmark.sh -n <node_count>
+```
+
+The argument `-n` refers to the number of test nodes.
+
+#### Data collection
+
+All the test results will come to this folder `/tmp/benchmark_output` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
diff --git a/CodeGen/benchmark/performance/benchmark.sh b/CodeGen/benchmark/performance/benchmark.sh
new file mode 100644
index 0000000000..e1ab2dae86
--- /dev/null
+++ b/CodeGen/benchmark/performance/benchmark.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+deployment_type="k8s"
+node_number=1
+service_port=7778
+query_per_node=128
+
+benchmark_tool_path="$(pwd)/GenAIEval"
+
+usage() {
+    echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
+    echo "  -d deployment_type    deployment type, select between k8s and docker (default: ${deployment_type})"
+    echo "  -n node_number        Test node number, required only for k8s deployment_type, (default: ${node_number})"
+    echo "  -i service_ip         service ip, required only for docker deployment_type"
+    echo "  -p service_port       service port, required only for docker deployment_type, (default: ${service_port})"
+    exit 1
+}
+
+while getopts ":d:n:i:p:" opt; do
+    case ${opt} in
+        d )
+            deployment_type=$OPTARG
+            ;;
+        n )
+            node_number=$OPTARG
+            ;;
+        i )
+            service_ip=$OPTARG
+            ;;
+        p )
+            service_port=$OPTARG
+            ;;
+        \? )
+            echo "Invalid option: -$OPTARG" 1>&2
+            usage
+            ;;
+        : )
+            echo "Invalid option: -$OPTARG requires an argument" 1>&2
+            usage
+            ;;
+    esac
+done
+
+if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
+    echo "Error: service_ip is required for docker deployment_type" 1>&2
+    usage
+fi
+
+if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
+    echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
+fi
+
+function main() {
+    if [[ ! -d ${benchmark_tool_path} ]]; then
+        echo "Benchmark tool not found, setting up..."
+        setup_env
+    fi
+    run_benchmark
+}
+
+function setup_env() {
+    git clone https://github.com/opea-project/GenAIEval.git
+    pushd ${benchmark_tool_path}
+    python3 -m venv stress_venv
+    source stress_venv/bin/activate
+    pip install -r requirements.txt
+    popd
+}
+
+function run_benchmark() {
+    source ${benchmark_tool_path}/stress_venv/bin/activate
+    export DEPLOYMENT_TYPE=${deployment_type}
+    export SERVICE_IP=${service_ip:-"None"}
+    export SERVICE_PORT=${service_port:-"None"}
+    if [[ -z $USER_QUERIES ]]; then
+        user_query=$((query_per_node*node_number))
+        export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
+        echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
+    fi
+    export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
+    if [[ -z $WARMUP ]]; then export WARMUP=0; fi
+    if [[ -z $TEST_OUTPUT_DIR ]]; then
+        if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
+        else
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
+        fi
+        echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
+    fi
+
+    envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
+    cd ${benchmark_tool_path}/evals/benchmark
+    python benchmark.py
+}
+
+main
diff --git a/CodeGen/benchmark/performance/benchmark.yaml b/CodeGen/benchmark/performance/benchmark.yaml
new file mode 100644
index 0000000000..90d74d02bf
--- /dev/null
+++ b/CodeGen/benchmark/performance/benchmark.yaml
@@ -0,0 +1,47 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+test_suite_config: # Overall configuration settings for the test suite
+  examples: ["chatqna"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
+  deployment_type: "k8s"  # Default is "k8s", can also be "docker"
+  service_ip: None  # Leave as None for k8s, specify for Docker
+  service_port: None  # Leave as None for k8s, specify for Docker
+  warm_ups: 0  # Number of test requests for warm-up
+  run_time: 60m  # The max total run time for the test suite
+  seed:  # The seed for all RNGs
+  user_queries: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]  # Number of test requests at each concurrency level
+  query_timeout: 120  # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
+  random_prompt: false  # Use random prompts if true, fixed prompts if false
+  collect_service_metric: false  # Collect service metrics if true, do not collect service metrics if false
+  data_visualization: false # Generate data visualization if true, do not generate data visualization if false
+  llm_model: "Qwen/CodeQwen1.5-7B-Chat"  # The LLM model used for the test
+  test_output_dir: "/tmp/benchmark_output"  # The directory to store the test output
+  load_shape:              # Tenant concurrency pattern
+    name: constant           # poisson or constant(locust default load shape)
+    params:                  # Loadshape-specific parameters
+      constant:                # Constant load shape specific parameters, activate only if load_shape.name is constant
+        concurrent_level: 4      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
+        # arrival_rate: 1.0       # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
+      poisson:                 # Poisson load shape specific parameters, activate only if load_shape.name is poisson
+        arrival_rate: 1.0        # Request arrival rate
+  namespace: "" # Fill the user-defined namespace. Otherwise, it will be default.
+
+test_cases:
+  codegen:
+    llm:
+      run_test: true
+      service_name: "llm-dependency-svc"  # Replace with your service name
+      parameters:
+        model_name: "Qwen/CodeQwen1.5-7B-Chat"
+        max_new_tokens: 128
+        temperature: 0.01
+        top_k: 10
+        top_p: 0.95
+        repetition_penalty: 1.03
+        streaming: true
+    llmserve:
+      run_test: true
+      service_name: "llm-svc"  # Replace with your service name
+    e2e:
+      run_test: true
+      service_name: "codegen-backend-svc"  # Replace with your service name
diff --git a/CodeGen/codegen.yaml b/CodeGen/codegen.yaml
index 95f2d78e6a..8dc864f6f6 100644
--- a/CodeGen/codegen.yaml
+++ b/CodeGen/codegen.yaml
@@ -6,7 +6,7 @@ opea_micro_services:
   tgi-service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     volumes:
       - "./data:/data"
     runtime: habana
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md
index 8bdde1f755..5332d719a3 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -105,7 +105,7 @@ export your_no_proxy=${your_no_proxy},"External_Public_IP"
 export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
-export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index ab1e4150ce..64b74db71f 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "8028:80"
diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
index 2a5040ea03..31cfad2929 100644
--- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
@@ -85,7 +85,7 @@ Since the `compose.yaml` will consume some environment variables, you need to se
 export no_proxy=${your_no_proxy}
 export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
-export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
index 153b9f59a9..92b70b099c 100644
--- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "8028:80"
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
index d66a120af2..dba717b64a 100644
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf"
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
diff --git a/CodeGen/kubernetes/intel/README.md b/CodeGen/kubernetes/intel/README.md
index be18003b83..0c47956a8c 100644
--- a/CodeGen/kubernetes/intel/README.md
+++ b/CodeGen/kubernetes/intel/README.md
@@ -12,9 +12,9 @@
 ## Deploy On Xeon
 
 ```
-cd GenAIExamples/CodeGen/kubernetes/intel/cpu/xeon/manifests
+cd GenAIExamples/CodeGen/kubernetes/intel/cpu/xeon/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-export MODEL_ID="meta-llama/CodeLlama-7b-hf"
+export MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
 sed -i "s/meta-llama\/CodeLlama-7b-hf/${MODEL_ID}/g" codegen.yaml
 kubectl apply -f codegen.yaml
@@ -23,7 +23,7 @@ kubectl apply -f codegen.yaml
 ## Deploy On Gaudi
 
 ```
-cd GenAIExamples/CodeGen/kubernetes/intel/hpu/gaudi/manifests
+cd GenAIExamples/CodeGen/kubernetes/intel/hpu/gaudi/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml
 kubectl apply -f codegen.yaml
diff --git a/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml b/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml
index dd1675ce3c..8dd3c2b574 100644
--- a/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml
@@ -29,6 +29,6 @@ spec:
         internalService:
           serviceName: tgi-service
           config:
-            MODEL_ID: meta-llama/CodeLlama-7b-hf
+            MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
             endpoint: /generate
           isDownstreamService: true
diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/README_react_ui.md b/CodeGen/kubernetes/intel/cpu/xeon/manifest/README_react_ui.md
index c9d2295bef..c9911ee7de 100644
--- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/README_react_ui.md
+++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/README_react_ui.md
@@ -17,7 +17,7 @@ Before deploying the react-codegen.yaml file, ensure that you have the following
     ```
     # You may set the HUGGINGFACEHUB_API_TOKEN via method:
     export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-    cd GenAIExamples/CodeGen/kubernetes/intel/cpu/xeon/manifests/ui/
+    cd GenAIExamples/CodeGen/kubernetes/intel/cpu/xeon/manifest/ui/
     sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" react-codegen.yaml
     ```
     b. Set the proxies based on your network configuration
diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
index 96cc682660..d0070dc969 100644
--- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -64,7 +64,7 @@ metadata:
     app.kubernetes.io/version: "2.1.0"
     app.kubernetes.io/managed-by: Helm
 data:
-  MODEL_ID: "meta-llama/CodeLlama-7b-hf"
+  MODEL_ID: "Qwen/Qwen2.5-Coder-7B-Instruct"
   PORT: "2080"
   HF_TOKEN: "insert-your-huggingface-token-here"
   http_proxy: ""
@@ -404,7 +404,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
index 5d77fb8cc8..a155af13a0 100644
--- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
@@ -126,7 +126,7 @@ spec:
             - name: no_proxy
               value:
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml b/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml
index 2e37820577..d9a927e5c4 100644
--- a/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml
+++ b/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml
@@ -29,6 +29,6 @@ spec:
         internalService:
           serviceName: tgi-gaudi-svc
           config:
-            MODEL_ID: meta-llama/CodeLlama-7b-hf
+            MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
             endpoint: /generate
           isDownstreamService: true
diff --git a/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml b/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml
index c4a43a7c3c..dc032cd25c 100644
--- a/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml
+++ b/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml
@@ -64,7 +64,7 @@ metadata:
     app.kubernetes.io/version: "2.1.0"
     app.kubernetes.io/managed-by: Helm
 data:
-  MODEL_ID: "meta-llama/CodeLlama-7b-hf"
+  MODEL_ID: "Qwen/Qwen2.5-Coder-7B-Instruct"
   PORT: "2080"
   HF_TOKEN: "insert-your-huggingface-token-here"
   http_proxy: ""
@@ -405,7 +405,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh
index ec1658314a..f90e0aaa46 100644
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="codegen codegen-ui llm-tgi"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker images && sleep 1s
 }
 
diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh
index 0821cd3cb6..b184c00f31 100644
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="codegen codegen-ui llm-tgi"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/CodeGen/ui/docker/Dockerfile b/CodeGen/ui/docker/Dockerfile
index ac2bb7da31..1d5115f4b5 100644
--- a/CodeGen/ui/docker/Dockerfile
+++ b/CodeGen/ui/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN npm run build
 EXPOSE 5173
 
 # Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
\ No newline at end of file
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
diff --git a/CodeGen/ui/docker/Dockerfile.react b/CodeGen/ui/docker/Dockerfile.react
index 49bc13124c..18afc393ad 100644
--- a/CodeGen/ui/docker/Dockerfile.react
+++ b/CodeGen/ui/docker/Dockerfile.react
@@ -18,4 +18,4 @@ COPY --from=vite-app /usr/app/react/dist /usr/share/nginx/html
 COPY ./react/env.sh /docker-entrypoint.d/env.sh
 
 COPY ./react/nginx.conf /etc/nginx/conf.d/default.conf
-RUN chmod +x /docker-entrypoint.d/env.sh
\ No newline at end of file
+RUN chmod +x /docker-entrypoint.d/env.sh
diff --git a/CodeGen/ui/react/.env b/CodeGen/ui/react/.env
index c5a7e3cad8..3ce78a405d 100644
--- a/CodeGen/ui/react/.env
+++ b/CodeGen/ui/react/.env
@@ -1 +1 @@
-VITE_CODE_GEN_URL=http://ip_address:7778/v1/codegen
\ No newline at end of file
+VITE_CODE_GEN_URL=http://ip_address:7778/v1/codegen
diff --git a/CodeGen/ui/react/.env.production b/CodeGen/ui/react/.env.production
index d3851cd494..8e99e67f9f 100644
--- a/CodeGen/ui/react/.env.production
+++ b/CodeGen/ui/react/.env.production
@@ -1 +1 @@
-VITE_CODE_GEN_URL=APP_CODE_GEN_URL
\ No newline at end of file
+VITE_CODE_GEN_URL=APP_CODE_GEN_URL
diff --git a/CodeGen/ui/react/nginx.conf b/CodeGen/ui/react/nginx.conf
index 00433fcda7..01aef12751 100644
--- a/CodeGen/ui/react/nginx.conf
+++ b/CodeGen/ui/react/nginx.conf
@@ -17,4 +17,4 @@ server {
       expires 1d;
     }
   }
-}
\ No newline at end of file
+}
diff --git a/CodeGen/ui/react/src/components/Shared/CodeRender/CodeRender.tsx b/CodeGen/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
index 479034cece..a21f7acc59 100644
--- a/CodeGen/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
+++ b/CodeGen/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
@@ -49,4 +49,4 @@ const CodeRender = ({ cleanCode, language, inline }:CodeRenderProps) => {
 }
 
 
-export default CodeRender;
\ No newline at end of file
+export default CodeRender;
diff --git a/CodeGen/ui/react/src/components/Shared/Markdown/Markdown.tsx b/CodeGen/ui/react/src/components/Shared/Markdown/Markdown.tsx
index 6331c6d08d..2726e14b2e 100644
--- a/CodeGen/ui/react/src/components/Shared/Markdown/Markdown.tsx
+++ b/CodeGen/ui/react/src/components/Shared/Markdown/Markdown.tsx
@@ -59,4 +59,4 @@ const Markdown = ({ content }: MarkdownProps) => {
             />)
 }
 
-export default Markdown;
\ No newline at end of file
+export default Markdown;
diff --git a/CodeGen/ui/svelte/.prettierrc b/CodeGen/ui/svelte/.prettierrc
index 3b2006102e..d146ee2b24 100644
--- a/CodeGen/ui/svelte/.prettierrc
+++ b/CodeGen/ui/svelte/.prettierrc
@@ -10,4 +10,4 @@
 			}
 		}
 	]
-}
\ No newline at end of file
+}
diff --git a/CodeGen/ui/svelte/src/app.postcss b/CodeGen/ui/svelte/src/app.postcss
index fa24380883..ae1c1623f5 100644
--- a/CodeGen/ui/svelte/src/app.postcss
+++ b/CodeGen/ui/svelte/src/app.postcss
@@ -113,4 +113,4 @@ a.btn {
 
 .w-12\/12 {
 	width: 100%
-}
\ No newline at end of file
+}
diff --git a/CodeTrans/Dockerfile b/CodeTrans/Dockerfile
index 89bb0b238d..918d936c96 100644
--- a/CodeTrans/Dockerfile
+++ b/CodeTrans/Dockerfile
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./code_translation.py /home/user/code_translation.py
diff --git a/CodeTrans/README.md b/CodeTrans/README.md
index b70666273f..5cfa9b27e0 100644
--- a/CodeTrans/README.md
+++ b/CodeTrans/README.md
@@ -77,9 +77,9 @@ Currently we support two ways of deploying Code Translation services on docker:
 
 By default, the LLM model is set to a default value as listed below:
 
-| Service | Model                         |
-| ------- | ----------------------------- |
-| LLM     | HuggingFaceH4/mistral-7b-grok |
+| Service | Model                              |
+| ------- | ---------------------------------- |
+| LLM     | mistralai/Mistral-7B-Instruct-v0.3 |
 
 Change the `LLM_MODEL_ID` in `docker_compose/set_env.sh` for your needs.
 
diff --git a/CodeTrans/benchmark/performance/README.md b/CodeTrans/benchmark/performance/README.md
new file mode 100644
index 0000000000..4b519de980
--- /dev/null
+++ b/CodeTrans/benchmark/performance/README.md
@@ -0,0 +1,77 @@
+# CodeTrans Benchmarking
+
+This folder contains a collection of scripts to enable inference benchmarking by leveraging a comprehensive benchmarking tool, [GenAIEval](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md), that enables throughput analysis to assess inference performance.
+
+By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
+
+## Purpose
+
+We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
+
+- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
+- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
+- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
+
+## Metrics
+
+The benchmark will report the below metrics, including:
+
+- Number of Concurrent Requests
+- End-to-End Latency: P50, P90, P99 (in milliseconds)
+- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
+- Average Next Token Latency (in milliseconds)
+- Average Token Latency (in milliseconds)
+- Requests Per Second (RPS)
+- Output Tokens Per Second
+- Input Tokens Per Second
+
+Results will be displayed in the terminal and saved as CSV file named `1_testspec.yaml`.
+
+## Getting Started
+
+We recommend using Kubernetes to deploy the CodeTrans service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs.
+
+### Prerequisites
+
+- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
+
+- Every node has direct internet access
+- Set up kubectl on the master node with access to the Kubernetes cluster.
+- Install Python 3.8+ on the master node for running GenAIEval.
+- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
+- Ensure that the container's ulimit can meet the the number of requests.
+
+```bash
+# The way to modify the containered ulimit:
+sudo systemctl edit containerd
+# Add two lines:
+[Service]
+LimitNOFILE=65536:1048576
+
+sudo systemctl daemon-reload; sudo systemctl restart containerd
+```
+
+### Test Steps
+
+Please deploy CodeTrans service before benchmarking.
+
+#### Run Benchmark Test
+
+Before the benchmark, we can configure the number of test queries and test output directory by:
+
+```bash
+export USER_QUERIES="[1, 1, 1, 1]"
+export TEST_OUTPUT_DIR="/tmp/benchmark_output"
+```
+
+And then run the benchmark by:
+
+```bash
+bash benchmark.sh -n <node_count>
+```
+
+The argument `-n` refers to the number of test nodes.
+
+#### Data collection
+
+All the test results will come to this folder `/tmp/benchmark_output` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
diff --git a/CodeTrans/benchmark/performance/benchmark.sh b/CodeTrans/benchmark/performance/benchmark.sh
new file mode 100644
index 0000000000..6eac50baf8
--- /dev/null
+++ b/CodeTrans/benchmark/performance/benchmark.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+deployment_type="k8s"
+node_number=1
+service_port=7777
+query_per_node=128
+
+benchmark_tool_path="$(pwd)/GenAIEval"
+
+usage() {
+    echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
+    echo "  -d deployment_type    deployment type, select between k8s and docker (default: ${deployment_type})"
+    echo "  -n node_number        Test node number, required only for k8s deployment_type, (default: ${node_number})"
+    echo "  -i service_ip         service ip, required only for docker deployment_type"
+    echo "  -p service_port       service port, required only for docker deployment_type, (default: ${service_port})"
+    exit 1
+}
+
+while getopts ":d:n:i:p:" opt; do
+    case ${opt} in
+        d )
+            deployment_type=$OPTARG
+            ;;
+        n )
+            node_number=$OPTARG
+            ;;
+        i )
+            service_ip=$OPTARG
+            ;;
+        p )
+            service_port=$OPTARG
+            ;;
+        \? )
+            echo "Invalid option: -$OPTARG" 1>&2
+            usage
+            ;;
+        : )
+            echo "Invalid option: -$OPTARG requires an argument" 1>&2
+            usage
+            ;;
+    esac
+done
+
+if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
+    echo "Error: service_ip is required for docker deployment_type" 1>&2
+    usage
+fi
+
+if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
+    echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
+fi
+
+function main() {
+    if [[ ! -d ${benchmark_tool_path} ]]; then
+        echo "Benchmark tool not found, setting up..."
+        setup_env
+    fi
+    run_benchmark
+}
+
+function setup_env() {
+    git clone https://github.com/opea-project/GenAIEval.git
+    pushd ${benchmark_tool_path}
+    python3 -m venv stress_venv
+    source stress_venv/bin/activate
+    pip install -r requirements.txt
+    popd
+}
+
+function run_benchmark() {
+    source ${benchmark_tool_path}/stress_venv/bin/activate
+    export DEPLOYMENT_TYPE=${deployment_type}
+    export SERVICE_IP=${service_ip:-"None"}
+    export SERVICE_PORT=${service_port:-"None"}
+    if [[ -z $USER_QUERIES ]]; then
+        user_query=$((query_per_node*node_number))
+        export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
+        echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
+    fi
+    export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
+    if [[ -z $WARMUP ]]; then export WARMUP=0; fi
+    if [[ -z $TEST_OUTPUT_DIR ]]; then
+        if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
+        else
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
+        fi
+        echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
+    fi
+
+    envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
+    cd ${benchmark_tool_path}/evals/benchmark
+    python benchmark.py
+}
+
+main
diff --git a/CodeTrans/benchmark/performance/benchmark.yaml b/CodeTrans/benchmark/performance/benchmark.yaml
new file mode 100644
index 0000000000..8680e886de
--- /dev/null
+++ b/CodeTrans/benchmark/performance/benchmark.yaml
@@ -0,0 +1,47 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+test_suite_config: # Overall configuration settings for the test suite
+  examples: ["codetrans"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
+  deployment_type: "k8s"  # Default is "k8s", can also be "docker"
+  service_ip: None  # Leave as None for k8s, specify for Docker
+  service_port: None  # Leave as None for k8s, specify for Docker
+  warm_ups: 0  # Number of test requests for warm-up
+  run_time: 60m  # The max total run time for the test suite
+  seed:  # The seed for all RNGs
+  user_queries: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]  # Number of test requests at each concurrency level
+  query_timeout: 120  # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
+  random_prompt: false  # Use random prompts if true, fixed prompts if false
+  collect_service_metric: false  # Collect service metrics if true, do not collect service metrics if false
+  data_visualization: false # Generate data visualization if true, do not generate data visualization if false
+  llm_model: "HuggingFaceH4/mistral-7b-grok"  # The LLM model used for the test
+  test_output_dir: "/home/sdp/benchmark_output"  # The directory to store the test output
+  load_shape:              # Tenant concurrency pattern
+    name: constant           # poisson or constant(locust default load shape)
+    params:                  # Loadshape-specific parameters
+      constant:                # Constant load shape specific parameters, activate only if load_shape.name is constant
+        concurrent_level: 4      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
+        # arrival_rate: 1.0       # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
+      poisson:                 # Poisson load shape specific parameters, activate only if load_shape.name is poisson
+        arrival_rate: 1.0        # Request arrival rate
+  namespace: "" # Fill the user-defined namespace. Otherwise, it will be default.
+
+test_cases:
+  codetrans:
+    llm:
+      run_test: true
+      service_name: "llm-svc"  # Replace with your service name
+      parameters:
+        model_name: "HuggingFaceH4/mistral-7b-grok"
+        max_new_tokens: 128
+        temperature: 0.01
+        top_k: 10
+        top_p: 0.95
+        repetition_penalty: 1.03
+        streaming: true
+    llmserve:
+      run_test: true
+      service_name: "codetrans-llm-svc"  # Replace with your service name
+    e2e:
+      run_test: true
+      service_name: "codetrans-backend-server-svc"  # Replace with your service name
diff --git a/CodeTrans/codetrans.yaml b/CodeTrans/codetrans.yaml
index 9d7f70b4ef..c362599788 100644
--- a/CodeTrans/codetrans.yaml
+++ b/CodeTrans/codetrans.yaml
@@ -6,7 +6,7 @@ opea_micro_services:
   tgi-service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     volumes:
       - "./data:/data"
     runtime: habana
diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/README.md b/CodeTrans/docker_compose/intel/cpu/xeon/README.md
index fd29ce2103..15f6414f04 100755
--- a/CodeTrans/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/README.md
@@ -57,9 +57,9 @@ Then run the command `docker images`, you will have the following Docker Images:
 
 By default, the LLM model is set to a default value as listed below:
 
-| Service | Model                         |
-| ------- | ----------------------------- |
-| LLM     | HuggingFaceH4/mistral-7b-grok |
+| Service | Model                              |
+| ------- | ---------------------------------- |
+| LLM     | mistralai/Mistral-7B-Instruct-v0.3 |
 
 Change the `LLM_MODEL_ID` below for your needs.
 
diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
index 122028b56e..16c05cf363 100644
--- a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: codetrans-tgi-service
     ports:
       - "8008:80"
diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md
index 1eb1812f22..04858bc235 100755
--- a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md
+++ b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md
@@ -49,9 +49,9 @@ Then run the command `docker images`, you will have the following Docker Images:
 
 By default, the LLM model is set to a default value as listed below:
 
-| Service | Model                         |
-| ------- | ----------------------------- |
-| LLM     | HuggingFaceH4/mistral-7b-grok |
+| Service | Model                              |
+| ------- | ---------------------------------- |
+| LLM     | mistralai/Mistral-7B-Instruct-v0.3 |
 
 Change the `LLM_MODEL_ID` below for your needs.
 
diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
index 09b82ed3f6..2f87d10c24 100644
--- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: codetrans-tgi-service
     ports:
       - "8008:80"
diff --git a/CodeTrans/docker_compose/set_env.sh b/CodeTrans/docker_compose/set_env.sh
index 5eae8f0cda..b4defd88c5 100644
--- a/CodeTrans/docker_compose/set_env.sh
+++ b/CodeTrans/docker_compose/set_env.sh
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
+export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
diff --git a/CodeTrans/kubernetes/intel/README.md b/CodeTrans/kubernetes/intel/README.md
index 9d6e63f8be..2f778a79b9 100644
--- a/CodeTrans/kubernetes/intel/README.md
+++ b/CodeTrans/kubernetes/intel/README.md
@@ -14,14 +14,14 @@ By default, the LLM model is set to a default value as listed below:
 
 |Service  |Model                    |
 |---------|-------------------------|
-|LLM      |HuggingFaceH4/mistral-7b-grok|
+|LLM      |mistralai/Mistral-7B-Instruct-v0.3|
 
 Change the `MODEL_ID` in `codetrans.yaml` for your needs.
 
 ## Deploy On Xeon
 
 ```bash
-cd GenAIExamples/CodeTrans/kubernetes/intel/cpu/xeon/manifests
+cd GenAIExamples/CodeTrans/kubernetes/intel/cpu/xeon/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codetrans.yaml
 kubectl apply -f codetrans.yaml
@@ -30,7 +30,7 @@ kubectl apply -f codetrans.yaml
 ## Deploy On Gaudi
 
 ```bash
-cd GenAIExamples/CodeTrans/kubernetes/intel/hpu/gaudi/manifests
+cd GenAIExamples/CodeTrans/kubernetes/intel/hpu/gaudi/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codetrans.yaml
 kubectl apply -f codetrans.yaml
diff --git a/CodeTrans/kubernetes/intel/README_gmc.md b/CodeTrans/kubernetes/intel/README_gmc.md
index 1b932f4ea2..0f66407d16 100644
--- a/CodeTrans/kubernetes/intel/README_gmc.md
+++ b/CodeTrans/kubernetes/intel/README_gmc.md
@@ -13,7 +13,7 @@ By default, the LLM model is set to a default value as listed below:
 
 |Service  |Model                    |
 |---------|-------------------------|
-|LLM      |HuggingFaceH4/mistral-7b-grok|
+|LLM      |mistralai/Mistral-7B-Instruct-v0.3|
 
 Change the `MODEL_ID` in `codetrans_xeon.yaml` for your needs.
 
diff --git a/CodeTrans/kubernetes/intel/cpu/xeon/gmc/codetrans_xeon.yaml b/CodeTrans/kubernetes/intel/cpu/xeon/gmc/codetrans_xeon.yaml
index 889a1d21a6..244e7eb54a 100644
--- a/CodeTrans/kubernetes/intel/cpu/xeon/gmc/codetrans_xeon.yaml
+++ b/CodeTrans/kubernetes/intel/cpu/xeon/gmc/codetrans_xeon.yaml
@@ -29,6 +29,6 @@ spec:
         internalService:
           serviceName: tgi-service
           config:
-            MODEL_ID: HuggingFaceH4/mistral-7b-grok
+            MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3
             endpoint: /generate
           isDownstreamService: true
diff --git a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
index a68768e2f8..a778a8529e 100644
--- a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
+++ b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
@@ -64,7 +64,7 @@ metadata:
     app.kubernetes.io/version: "2.1.0"
     app.kubernetes.io/managed-by: Helm
 data:
-  MODEL_ID: "HuggingFaceH4/mistral-7b-grok"
+  MODEL_ID: "mistralai/Mistral-7B-Instruct-v0.3"
   PORT: "2080"
   HF_TOKEN: "insert-your-huggingface-token-here"
   http_proxy: ""
@@ -404,7 +404,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeTrans/kubernetes/intel/hpu/gaudi/gmc/codetrans_gaudi.yaml b/CodeTrans/kubernetes/intel/hpu/gaudi/gmc/codetrans_gaudi.yaml
index 5bc1bd5e2d..b61ffef3ec 100644
--- a/CodeTrans/kubernetes/intel/hpu/gaudi/gmc/codetrans_gaudi.yaml
+++ b/CodeTrans/kubernetes/intel/hpu/gaudi/gmc/codetrans_gaudi.yaml
@@ -29,6 +29,6 @@ spec:
         internalService:
           serviceName: tgi-gaudi-svc
           config:
-            MODEL_ID: HuggingFaceH4/mistral-7b-grok
+            MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3
             endpoint: /generate
           isDownstreamService: true
diff --git a/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml b/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml
index 541f311799..a2efecf44b 100644
--- a/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml
+++ b/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml
@@ -64,7 +64,7 @@ metadata:
     app.kubernetes.io/version: "2.1.0"
     app.kubernetes.io/managed-by: Helm
 data:
-  MODEL_ID: "HuggingFaceH4/mistral-7b-grok"
+  MODEL_ID: "mistralai/Mistral-7B-Instruct-v0.3"
   PORT: "2080"
   HF_TOKEN: "insert-your-huggingface-token-here"
   http_proxy: ""
@@ -405,7 +405,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeTrans/tests/test_compose_on_gaudi.sh b/CodeTrans/tests/test_compose_on_gaudi.sh
index b246f4dc91..c6e8b8c9bd 100644
--- a/CodeTrans/tests/test_compose_on_gaudi.sh
+++ b/CodeTrans/tests/test_compose_on_gaudi.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="codetrans codetrans-ui llm-tgi nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker images && sleep 1s
 }
 
@@ -31,7 +31,7 @@ function start_services() {
 
     export http_proxy=${http_proxy}
     export https_proxy=${http_proxy}
-    export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
+    export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
     export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
diff --git a/CodeTrans/tests/test_compose_on_xeon.sh b/CodeTrans/tests/test_compose_on_xeon.sh
index 8cbcb23208..63fe74f058 100644
--- a/CodeTrans/tests/test_compose_on_xeon.sh
+++ b/CodeTrans/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="codetrans codetrans-ui llm-tgi nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
@@ -30,7 +30,7 @@ function start_services() {
     cd $WORKPATH/docker_compose/intel/cpu/xeon/
     export http_proxy=${http_proxy}
     export https_proxy=${http_proxy}
-    export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok"
+    export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"
     export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
diff --git a/CodeTrans/ui/docker/Dockerfile b/CodeTrans/ui/docker/Dockerfile
index ac2bb7da31..1d5115f4b5 100644
--- a/CodeTrans/ui/docker/Dockerfile
+++ b/CodeTrans/ui/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN npm run build
 EXPOSE 5173
 
 # Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
\ No newline at end of file
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
diff --git a/DBQnA/README.md b/DBQnA/README.md
index 1164d50fbf..063475c181 100644
--- a/DBQnA/README.md
+++ b/DBQnA/README.md
@@ -4,6 +4,48 @@ Experience a revolutionary way to interact with your database using our DBQnA ap
 
 ---
 
+```mermaid
+flowchart LR
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.7
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.7
+    classDef orchid fill:#DA70D6,stroke:#1E90FF,stroke-width:2px,fill-opacity:0.7
+    classDef invisible fill:transparent,stroke:transparent;
+    style Text2SQL-MegaService stroke:#000000
+
+    %% Subgraphs %%
+    subgraph Text2SQL-MegaService["Text-to-SQL MegaService "]
+        direction LR
+        LLM([LLM MicroService]):::invisible
+    end
+    subgraph UserInterface[" User Interface "]
+        direction LR
+        a([User Input Query]):::orchid
+        UI([UI server<br>]):::orchid
+    end
+
+    LLM_gen{{LLM Service <br>}}
+    POSTGRES_DB{{POSGRES DATABASE <br>}}
+    GW([Text-to-SQL GateWay<br>]):::orange
+
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] --> UI
+    UI --> GW
+    GW <==> Text2SQL-MegaService
+
+
+    %% Text-to-SQL service flow
+    direction TB
+    LLM <-.-> POSTGRES_DB
+    direction LR
+    LLM <-.-> LLM_gen
+
+```
+
+---
+
 ## 🛠️ Key Features
 
 ### 💬 SQL Query Generation
diff --git a/DBQnA/ui/react/nginx.conf b/DBQnA/ui/react/nginx.conf
index 00433fcda7..01aef12751 100644
--- a/DBQnA/ui/react/nginx.conf
+++ b/DBQnA/ui/react/nginx.conf
@@ -17,4 +17,4 @@ server {
       expires 1d;
     }
   }
-}
\ No newline at end of file
+}
diff --git a/DBQnA/ui/react/src/logo.svg b/DBQnA/ui/react/src/logo.svg
index 9dfc1c058c..7169476033 100644
--- a/DBQnA/ui/react/src/logo.svg
+++ b/DBQnA/ui/react/src/logo.svg
@@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 841.9 595.3"><g fill="#61DAFB"><path d="M666.3 296.5c0-32.5-40.7-63.3-103.1-82.4 14.4-63.6 8-114.2-20.2-130.4-6.5-3.8-14.1-5.6-22.4-5.6v22.3c4.6 0 8.3.9 11.4 2.6 13.6 7.8 19.5 37.5 14.9 75.7-1.1 9.4-2.9 19.3-5.1 29.4-19.6-4.8-41-8.5-63.5-10.9-13.5-18.5-27.5-35.3-41.6-50 32.6-30.3 63.2-46.9 84-46.9V78c-27.5 0-63.5 19.6-99.9 53.6-36.4-33.8-72.4-53.2-99.9-53.2v22.3c20.7 0 51.4 16.5 84 46.6-14 14.7-28 31.4-41.3 49.9-22.6 2.4-44 6.1-63.6 11-2.3-10-4-19.7-5.2-29-4.7-38.2 1.1-67.9 14.6-75.8 3-1.8 6.9-2.6 11.5-2.6V78.5c-8.4 0-16 1.8-22.6 5.6-28.1 16.2-34.4 66.7-19.9 130.1-62.2 19.2-102.7 49.9-102.7 82.3 0 32.5 40.7 63.3 103.1 82.4-14.4 63.6-8 114.2 20.2 130.4 6.5 3.8 14.1 5.6 22.5 5.6 27.5 0 63.5-19.6 99.9-53.6 36.4 33.8 72.4 53.2 99.9 53.2 8.4 0 16-1.8 22.6-5.6 28.1-16.2 34.4-66.7 19.9-130.1 62-19.1 102.5-49.9 102.5-82.3zm-130.2-66.7c-3.7 12.9-8.3 26.2-13.5 39.5-4.1-8-8.4-16-13.1-24-4.6-8-9.5-15.8-14.4-23.4 14.2 2.1 27.9 4.7 41 7.9zm-45.8 106.5c-7.8 13.5-15.8 26.3-24.1 38.2-14.9 1.3-30 2-45.2 2-15.1 0-30.2-.7-45-1.9-8.3-11.9-16.4-24.6-24.2-38-7.6-13.1-14.5-26.4-20.8-39.8 6.2-13.4 13.2-26.8 20.7-39.9 7.8-13.5 15.8-26.3 24.1-38.2 14.9-1.3 30-2 45.2-2 15.1 0 30.2.7 45 1.9 8.3 11.9 16.4 24.6 24.2 38 7.6 13.1 14.5 26.4 20.8 39.8-6.3 13.4-13.2 26.8-20.7 39.9zm32.3-13c5.4 13.4 10 26.8 13.8 39.8-13.1 3.2-26.9 5.9-41.2 8 4.9-7.7 9.8-15.6 14.4-23.7 4.6-8 8.9-16.1 13-24.1zM421.2 430c-9.3-9.6-18.6-20.3-27.8-32 9 .4 18.2.7 27.5.7 9.4 0 18.7-.2 27.8-.7-9 11.7-18.3 22.4-27.5 32zm-74.4-58.9c-14.2-2.1-27.9-4.7-41-7.9 3.7-12.9 8.3-26.2 13.5-39.5 4.1 8 8.4 16 13.1 24 4.7 8 9.5 15.8 14.4 23.4zM420.7 163c9.3 9.6 18.6 20.3 27.8 32-9-.4-18.2-.7-27.5-.7-9.4 0-18.7.2-27.8.7 9-11.7 18.3-22.4 27.5-32zm-74 58.9c-4.9 7.7-9.8 15.6-14.4 23.7-4.6 8-8.9 16-13 24-5.4-13.4-10-26.8-13.8-39.8 13.1-3.1 26.9-5.8 41.2-7.9zm-90.5 125.2c-35.4-15.1-58.3-34.9-58.3-50.6 0-15.7 22.9-35.6 58.3-50.6 8.6-3.7 18-7 27.7-10.1 5.7 19.6 13.2 40 22.5 60.9-9.2 20.8-16.6 41.1-22.2 60.6-9.9-3.1-19.3-6.5-28-10.2zM310 490c-13.6-7.8-19.5-37.5-14.9-75.7 1.1-9.4 2.9-19.3 5.1-29.4 19.6 4.8 41 8.5 63.5 10.9 13.5 18.5 27.5 35.3 41.6 50-32.6 30.3-63.2 46.9-84 46.9-4.5-.1-8.3-1-11.3-2.7zm237.2-76.2c4.7 38.2-1.1 67.9-14.6 75.8-3 1.8-6.9 2.6-11.5 2.6-20.7 0-51.4-16.5-84-46.6 14-14.7 28-31.4 41.3-49.9 22.6-2.4 44-6.1 63.6-11 2.3 10.1 4.1 19.8 5.2 29.1zm38.5-66.7c-8.6 3.7-18 7-27.7 10.1-5.7-19.6-13.2-40-22.5-60.9 9.2-20.8 16.6-41.1 22.2-60.6 9.9 3.1 19.3 6.5 28.1 10.2 35.4 15.1 58.3 34.9 58.3 50.6-.1 15.7-23 35.6-58.4 50.6zM320.8 78.4z"/><circle cx="420.9" cy="296.5" r="45.7"/><path d="M520.5 78.1z"/></g></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 841.9 595.3"><g fill="#61DAFB"><path d="M666.3 296.5c0-32.5-40.7-63.3-103.1-82.4 14.4-63.6 8-114.2-20.2-130.4-6.5-3.8-14.1-5.6-22.4-5.6v22.3c4.6 0 8.3.9 11.4 2.6 13.6 7.8 19.5 37.5 14.9 75.7-1.1 9.4-2.9 19.3-5.1 29.4-19.6-4.8-41-8.5-63.5-10.9-13.5-18.5-27.5-35.3-41.6-50 32.6-30.3 63.2-46.9 84-46.9V78c-27.5 0-63.5 19.6-99.9 53.6-36.4-33.8-72.4-53.2-99.9-53.2v22.3c20.7 0 51.4 16.5 84 46.6-14 14.7-28 31.4-41.3 49.9-22.6 2.4-44 6.1-63.6 11-2.3-10-4-19.7-5.2-29-4.7-38.2 1.1-67.9 14.6-75.8 3-1.8 6.9-2.6 11.5-2.6V78.5c-8.4 0-16 1.8-22.6 5.6-28.1 16.2-34.4 66.7-19.9 130.1-62.2 19.2-102.7 49.9-102.7 82.3 0 32.5 40.7 63.3 103.1 82.4-14.4 63.6-8 114.2 20.2 130.4 6.5 3.8 14.1 5.6 22.5 5.6 27.5 0 63.5-19.6 99.9-53.6 36.4 33.8 72.4 53.2 99.9 53.2 8.4 0 16-1.8 22.6-5.6 28.1-16.2 34.4-66.7 19.9-130.1 62-19.1 102.5-49.9 102.5-82.3zm-130.2-66.7c-3.7 12.9-8.3 26.2-13.5 39.5-4.1-8-8.4-16-13.1-24-4.6-8-9.5-15.8-14.4-23.4 14.2 2.1 27.9 4.7 41 7.9zm-45.8 106.5c-7.8 13.5-15.8 26.3-24.1 38.2-14.9 1.3-30 2-45.2 2-15.1 0-30.2-.7-45-1.9-8.3-11.9-16.4-24.6-24.2-38-7.6-13.1-14.5-26.4-20.8-39.8 6.2-13.4 13.2-26.8 20.7-39.9 7.8-13.5 15.8-26.3 24.1-38.2 14.9-1.3 30-2 45.2-2 15.1 0 30.2.7 45 1.9 8.3 11.9 16.4 24.6 24.2 38 7.6 13.1 14.5 26.4 20.8 39.8-6.3 13.4-13.2 26.8-20.7 39.9zm32.3-13c5.4 13.4 10 26.8 13.8 39.8-13.1 3.2-26.9 5.9-41.2 8 4.9-7.7 9.8-15.6 14.4-23.7 4.6-8 8.9-16.1 13-24.1zM421.2 430c-9.3-9.6-18.6-20.3-27.8-32 9 .4 18.2.7 27.5.7 9.4 0 18.7-.2 27.8-.7-9 11.7-18.3 22.4-27.5 32zm-74.4-58.9c-14.2-2.1-27.9-4.7-41-7.9 3.7-12.9 8.3-26.2 13.5-39.5 4.1 8 8.4 16 13.1 24 4.7 8 9.5 15.8 14.4 23.4zM420.7 163c9.3 9.6 18.6 20.3 27.8 32-9-.4-18.2-.7-27.5-.7-9.4 0-18.7.2-27.8.7 9-11.7 18.3-22.4 27.5-32zm-74 58.9c-4.9 7.7-9.8 15.6-14.4 23.7-4.6 8-8.9 16-13 24-5.4-13.4-10-26.8-13.8-39.8 13.1-3.1 26.9-5.8 41.2-7.9zm-90.5 125.2c-35.4-15.1-58.3-34.9-58.3-50.6 0-15.7 22.9-35.6 58.3-50.6 8.6-3.7 18-7 27.7-10.1 5.7 19.6 13.2 40 22.5 60.9-9.2 20.8-16.6 41.1-22.2 60.6-9.9-3.1-19.3-6.5-28-10.2zM310 490c-13.6-7.8-19.5-37.5-14.9-75.7 1.1-9.4 2.9-19.3 5.1-29.4 19.6 4.8 41 8.5 63.5 10.9 13.5 18.5 27.5 35.3 41.6 50-32.6 30.3-63.2 46.9-84 46.9-4.5-.1-8.3-1-11.3-2.7zm237.2-76.2c4.7 38.2-1.1 67.9-14.6 75.8-3 1.8-6.9 2.6-11.5 2.6-20.7 0-51.4-16.5-84-46.6 14-14.7 28-31.4 41.3-49.9 22.6-2.4 44-6.1 63.6-11 2.3 10.1 4.1 19.8 5.2 29.1zm38.5-66.7c-8.6 3.7-18 7-27.7 10.1-5.7-19.6-13.2-40-22.5-60.9 9.2-20.8 16.6-41.1 22.2-60.6 9.9 3.1 19.3 6.5 28.1 10.2 35.4 15.1 58.3 34.9 58.3 50.6-.1 15.7-23 35.6-58.4 50.6zM320.8 78.4z"/><circle cx="420.9" cy="296.5" r="45.7"/><path d="M520.5 78.1z"/></g></svg>
diff --git a/DBQnA/ui/react/src/main.tsx b/DBQnA/ui/react/src/main.tsx
index a6695d54e1..7f3e441540 100644
--- a/DBQnA/ui/react/src/main.tsx
+++ b/DBQnA/ui/react/src/main.tsx
@@ -10,4 +10,4 @@ ReactDOM.createRoot(document.getElementById("root")!).render(
   <React.StrictMode>
       <App />
   </React.StrictMode>
-)
\ No newline at end of file
+)
diff --git a/DocIndexRetriever/Dockerfile b/DocIndexRetriever/Dockerfile
index 4e738a22a9..c8794f3efc 100644
--- a/DocIndexRetriever/Dockerfile
+++ b/DocIndexRetriever/Dockerfile
@@ -16,7 +16,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./retrieval_tool.py /home/user/retrieval_tool.py
diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
index 1d0a445050..fc8accadcf 100644
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -28,7 +28,7 @@ services:
       TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/tei-gaudi:1.5.0
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
diff --git a/DocIndexRetriever/tests/test_compose_on_gaudi.sh b/DocIndexRetriever/tests/test_compose_on_gaudi.sh
index 8779944be4..e652ead26b 100644
--- a/DocIndexRetriever/tests/test_compose_on_gaudi.sh
+++ b/DocIndexRetriever/tests/test_compose_on_gaudi.sh
@@ -24,7 +24,7 @@ function build_docker_images() {
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull redis/redis-stack:7.2.0-v9
-    docker pull ghcr.io/huggingface/tei-gaudi:latest
+    docker pull ghcr.io/huggingface/tei-gaudi:1.5.0
     docker images && sleep 1s
 }
 
diff --git a/DocSum/Dockerfile b/DocSum/Dockerfile
index 5ffd463217..d0dac691c8 100644
--- a/DocSum/Dockerfile
+++ b/DocSum/Dockerfile
@@ -18,7 +18,7 @@ WORKDIR /home/user
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./docsum.py /home/user/docsum.py
diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
index 620ee36575..35e673563b 100644
--- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "8008:80"
diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md
index 3480750db7..6882f0ebae 100644
--- a/DocSum/docker_compose/intel/hpu/gaudi/README.md
+++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md
@@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally. This step can be ignored
 As TGI Gaudi has been officially published as a Docker image, we simply need to pull it:
 
 ```bash
-docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
 ```
 
 ### 2. Build LLM Image
@@ -28,7 +28,7 @@ To construct the Mega Service, we utilize the [GenAIComps](https://github.com/op
 
 ```bash
 git clone https://github.com/opea-project/GenAIExamples
-cd GenAIExamples/DocSum/docker
+cd GenAIExamples/DocSum
 docker build -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
 ```
 
@@ -53,7 +53,7 @@ docker build -t opea/docsum-react-ui:latest --build-arg BACKEND_SERVICE_ENDPOINT
 
 Then run the command `docker images`, you will have the following Docker Images:
 
-1. `ghcr.io/huggingface/tgi-gaudi:2.0.5`
+1. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
 2. `opea/llm-docsum-tgi:latest`
 3. `opea/docsum:latest`
 4. `opea/docsum-ui:latest`
diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
index e9f3a96f85..71c52b40ae 100644
--- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,11 +3,12 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "8008:80"
     environment:
+      HABANA_VISIBLE_DEVICES: all
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
diff --git a/DocSum/docsum.yaml b/DocSum/docsum.yaml
index bc87bc5b46..9e9936ff49 100644
--- a/DocSum/docsum.yaml
+++ b/DocSum/docsum.yaml
@@ -6,7 +6,7 @@ opea_micro_services:
   tgi-service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     volumes:
       - "./data:/data"
     runtime: habana
diff --git a/DocSum/kubernetes/intel/README.md b/DocSum/kubernetes/intel/README.md
index dc81ee35ee..b3c797020e 100644
--- a/DocSum/kubernetes/intel/README.md
+++ b/DocSum/kubernetes/intel/README.md
@@ -11,7 +11,7 @@
 ## Deploy On Xeon
 
 ```
-cd GenAIExamples/DocSum/kubernetes/intel/cpu/xeon/manifests
+cd GenAIExamples/DocSum/kubernetes/intel/cpu/xeon/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" docsum.yaml
 kubectl apply -f docsum.yaml
@@ -20,7 +20,7 @@ kubectl apply -f docsum.yaml
 ## Deploy On Gaudi
 
 ```
-cd GenAIExamples/DocSum/kubernetes/intel/hpu/gaudi/manifests
+cd GenAIExamples/DocSum/kubernetes/intel/hpu/gaudi/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" docsum.yaml
 kubectl apply -f docsum.yaml
diff --git a/DocSum/kubernetes/intel/README_gmc.md b/DocSum/kubernetes/intel/README_gmc.md
index b332292110..00e9d8e1bc 100644
--- a/DocSum/kubernetes/intel/README_gmc.md
+++ b/DocSum/kubernetes/intel/README_gmc.md
@@ -8,8 +8,8 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
 The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.
 
 The DocSum pipeline uses  prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the
-the image `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
-service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.5`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`.
+the image `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
+service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.6`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`.
 
 [NOTE]
 Refer to [Docker Xeon README](https://github.com/opea-project/GenAIExamples/blob/main/DocSum/docker_compose/intel/cpu/xeon/README.md) or
diff --git a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
index 1416bdbcbc..9199888a10 100644
--- a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
+++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
@@ -404,7 +404,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/README.md b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/README.md
index de7419bc90..7a4f74e848 100644
--- a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/README.md
+++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/README.md
@@ -16,7 +16,7 @@ Before deploying the react-docsum.yaml file, ensure that you have the following
        ```
        # You may set the HUGGINGFACEHUB_API_TOKEN via method:
        export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-       cd GenAIExamples/DocSum/kubernetes/intel/cpu/xeon/manifests/ui/
+       cd GenAIExamples/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/
        sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" react-docsum.yaml
        ```
     b. Set the proxies based on your network configuration
diff --git a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
index 61e8799b0e..560e34a215 100644
--- a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
+++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
@@ -126,7 +126,7 @@ spec:
             - name: no_proxy
               value:
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml b/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml
index 5c10f3c761..7ab1df9b1e 100644
--- a/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml
+++ b/DocSum/kubernetes/intel/hpu/gaudi/manifest/docsum.yaml
@@ -405,7 +405,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh
index 9c9ca92a03..12a6a8861b 100644
--- a/DocSum/tests/test_compose_on_gaudi.sh
+++ b/DocSum/tests/test_compose_on_gaudi.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="docsum docsum-ui llm-docsum-tgi"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker images && sleep 1s
 }
 
@@ -169,7 +169,7 @@ function main() {
 
     validate_microservices
     validate_megaservice
-    #validate_frontend
+    validate_frontend
 
     stop_docker
     echo y | docker system prune
diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh
index fbd6797f58..7f0b2f8c53 100644
--- a/DocSum/tests/test_compose_on_xeon.sh
+++ b/DocSum/tests/test_compose_on_xeon.sh
@@ -168,7 +168,7 @@ function main() {
 
     validate_microservices
     validate_megaservice
-    #validate_frontend
+    validate_frontend
 
     stop_docker
     echo y | docker system prune
diff --git a/DocSum/ui/docker/Dockerfile b/DocSum/ui/docker/Dockerfile
index ac2bb7da31..1d5115f4b5 100644
--- a/DocSum/ui/docker/Dockerfile
+++ b/DocSum/ui/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN npm run build
 EXPOSE 5173
 
 # Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
\ No newline at end of file
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
diff --git a/DocSum/ui/docker/Dockerfile.react b/DocSum/ui/docker/Dockerfile.react
index aa8f3fe78e..9458864658 100644
--- a/DocSum/ui/docker/Dockerfile.react
+++ b/DocSum/ui/docker/Dockerfile.react
@@ -21,4 +21,4 @@ EXPOSE 80
 COPY --from=vite-app /usr/app/react/nginx.conf /etc/nginx/conf.d/default.conf
 COPY --from=vite-app /usr/app/react/dist /usr/share/nginx/html
 
-ENTRYPOINT ["nginx", "-g", "daemon off;"]
\ No newline at end of file
+ENTRYPOINT ["nginx", "-g", "daemon off;"]
diff --git a/DocSum/ui/react/.env b/DocSum/ui/react/.env
index 88e4996a29..b25495154a 100644
--- a/DocSum/ui/react/.env
+++ b/DocSum/ui/react/.env
@@ -1 +1 @@
-VITE_DOC_SUM_URL=http://backend_address:8888/v1/docsum
\ No newline at end of file
+VITE_DOC_SUM_URL=http://backend_address:8888/v1/docsum
diff --git a/DocSum/ui/react/nginx.conf b/DocSum/ui/react/nginx.conf
index 00433fcda7..01aef12751 100644
--- a/DocSum/ui/react/nginx.conf
+++ b/DocSum/ui/react/nginx.conf
@@ -17,4 +17,4 @@ server {
       expires 1d;
     }
   }
-}
\ No newline at end of file
+}
diff --git a/DocSum/ui/react/src/components/DocSum/DocSum.tsx b/DocSum/ui/react/src/components/DocSum/DocSum.tsx
index 9e7472c658..2fa9fd4a34 100644
--- a/DocSum/ui/react/src/components/DocSum/DocSum.tsx
+++ b/DocSum/ui/react/src/components/DocSum/DocSum.tsx
@@ -150,4 +150,4 @@ const DocSum = () => {
     )
 }
 
-export default DocSum
\ No newline at end of file
+export default DocSum
diff --git a/DocSum/ui/react/src/components/DocSum/FileUpload.tsx b/DocSum/ui/react/src/components/DocSum/FileUpload.tsx
index baa77670ca..1790cfb161 100644
--- a/DocSum/ui/react/src/components/DocSum/FileUpload.tsx
+++ b/DocSum/ui/react/src/components/DocSum/FileUpload.tsx
@@ -64,4 +64,4 @@ export function FileUpload(props: Partial<DropzoneProps>) {
             </Group>
         </Dropzone>
     );
-}
\ No newline at end of file
+}
diff --git a/DocSum/ui/react/src/components/Shared/CodeRender/CodeRender.tsx b/DocSum/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
index 479034cece..a21f7acc59 100644
--- a/DocSum/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
+++ b/DocSum/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
@@ -49,4 +49,4 @@ const CodeRender = ({ cleanCode, language, inline }:CodeRenderProps) => {
 }
 
 
-export default CodeRender;
\ No newline at end of file
+export default CodeRender;
diff --git a/DocSum/ui/react/src/components/Shared/Markdown/Markdown.tsx b/DocSum/ui/react/src/components/Shared/Markdown/Markdown.tsx
index dc4b2d3083..77471cd190 100644
--- a/DocSum/ui/react/src/components/Shared/Markdown/Markdown.tsx
+++ b/DocSum/ui/react/src/components/Shared/Markdown/Markdown.tsx
@@ -55,4 +55,4 @@ const Markdown = ({ content }: MarkdownProps) => {
             />)
 }
 
-export default Markdown;
\ No newline at end of file
+export default Markdown;
diff --git a/DocSum/ui/svelte/src/lib/shared/Network.ts b/DocSum/ui/svelte/src/lib/shared/Network.ts
index 172b25a441..705019c897 100644
--- a/DocSum/ui/svelte/src/lib/shared/Network.ts
+++ b/DocSum/ui/svelte/src/lib/shared/Network.ts
@@ -20,7 +20,12 @@ export async function fetchTextStream(query: string | Blob, params: string, file
   const url = `${DOC_BASE_URL}`; // Ensure the URL is constructed correctly
   const formData = new FormData();
 
-  if (params === "doc_id" && file) {
+  if (!file) {
+    file = new Blob([""], { type: "text/plain" });
+    fileName = "empty.txt";
+  }
+
+  if (params === "doc_id") {
     formData.append("files", file, fileName);
     formData.append("messages", query);
   } else if (params === "text") {
diff --git a/EdgeCraftRAG/Dockerfile b/EdgeCraftRAG/Dockerfile
new file mode 100644
index 0000000000..3c9711deaf
--- /dev/null
+++ b/EdgeCraftRAG/Dockerfile
@@ -0,0 +1,28 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev 
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+COPY ./edgecraftrag /home/user/edgecraftrag
+COPY ./chatqna.py /home/user/chatqna.py
+
+WORKDIR /home/user/edgecraftrag
+RUN pip install --no-cache-dir -r requirements.txt
+
+WORKDIR /home/user
+
+USER user
+
+RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
+
+ENTRYPOINT ["python", "chatqna.py"]
diff --git a/EdgeCraftRAG/Dockerfile.server b/EdgeCraftRAG/Dockerfile.server
new file mode 100644
index 0000000000..c04dc0a545
--- /dev/null
+++ b/EdgeCraftRAG/Dockerfile.server
@@ -0,0 +1,35 @@
+FROM python:3.11-slim
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN apt-get update && apt-get install -y gnupg wget 
+RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
+    gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
+RUN echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
+    tee /etc/apt/sources.list.d/intel-gpu-jammy.list
+RUN apt-get update
+RUN apt-get install -y \
+    intel-opencl-icd intel-level-zero-gpu level-zero intel-level-zero-gpu-raytracing \
+    intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
+    libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
+    libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
+    mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo 
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/ 
+
+COPY ./edgecraftrag /home/user/edgecraftrag
+
+WORKDIR /home/user/edgecraftrag
+RUN pip install --no-cache-dir -r requirements.txt
+
+WORKDIR /home/user/
+
+USER user
+
+ENTRYPOINT ["python", "-m", "edgecraftrag.server"]
diff --git a/EdgeCraftRAG/README.md b/EdgeCraftRAG/README.md
new file mode 100644
index 0000000000..da8d2efb07
--- /dev/null
+++ b/EdgeCraftRAG/README.md
@@ -0,0 +1,274 @@
+# Edge Craft Retrieval-Augmented Generation
+
+Edge Craft RAG (EC-RAG) is a customizable, tunable and production-ready
+Retrieval-Augmented Generation system for edge solutions. It is designed to
+curate the RAG pipeline to meet hardware requirements at edge with guaranteed
+quality and performance.
+
+## Quick Start Guide
+
+### Run Containers with Docker Compose
+
+```bash
+cd GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc
+
+export MODEL_PATH="your model path for all your models"
+export DOC_PATH="your doc path for uploading a dir of files"
+export HOST_IP="your host ip"
+export UI_SERVICE_PORT="port for UI service"
+
+# Optional for vllm endpoint
+export vLLM_ENDPOINT="http://${HOST_IP}:8008"
+
+# If you have a proxy configured, uncomment below line
+# export no_proxy=$no_proxy,${HOST_IP},edgecraftrag,edgecraftrag-server
+# If you have a HF mirror configured, it will be imported to the container
+# export HF_ENDPOINT="your HF mirror endpoint"
+
+# By default, the ports of the containers are set, uncomment if you want to change
+# export MEGA_SERVICE_PORT=16011
+# export PIPELINE_SERVICE_PORT=16011
+
+docker compose up -d
+```
+
+### (Optional) Build Docker Images for Mega Service, Server and UI by your own
+
+```bash
+cd GenAIExamples/EdgeCraftRAG
+
+docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag:latest -f Dockerfile .
+docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-server:latest -f Dockerfile.server .
+docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui .
+```
+
+### ChatQnA with LLM Example (Command Line)
+
+```bash
+cd GenAIExamples/EdgeCraftRAG
+
+# Activate pipeline test_pipeline_local_llm
+curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.'
+
+# Will need to wait for several minutes
+# Expected output:
+# {
+#   "idx": "3214cf25-8dff-46e6-b7d1-1811f237cf8c",
+#   "name": "rag_test",
+#   "comp_type": "pipeline",
+#   "node_parser": {
+#     "idx": "ababed12-c192-4cbb-b27e-e49c76a751ca",
+#     "parser_type": "simple",
+#     "chunk_size": 400,
+#     "chunk_overlap": 48
+#   },
+#   "indexer": {
+#     "idx": "46969b63-8a32-4142-874d-d5c86ee9e228",
+#     "indexer_type": "faiss_vector",
+#     "model": {
+#       "idx": "7aae57c0-13a4-4a15-aecb-46c2ec8fe738",
+#       "type": "embedding",
+#       "model_id": "BAAI/bge-small-en-v1.5",
+#       "model_path": "/home/user/models/bge_ov_embedding",
+#       "device": "auto"
+#     }
+#   },
+#   "retriever": {
+#     "idx": "3747fa59-ff9b-49b6-a8e8-03cdf8c979a4",
+#     "retriever_type": "vectorsimilarity",
+#     "retrieve_topk": 30
+#   },
+#   "postprocessor": [
+#     {
+#       "idx": "d46a6cae-ba7a-412e-85b7-d334f175efaa",
+#       "postprocessor_type": "reranker",
+#       "model": {
+#         "idx": "374e7471-bd7d-41d0-b69d-a749a052b4b0",
+#         "type": "reranker",
+#         "model_id": "BAAI/bge-reranker-large",
+#         "model_path": "/home/user/models/bge_ov_reranker",
+#         "device": "auto"
+#       },
+#       "top_n": 2
+#     }
+#   ],
+#   "generator": {
+#     "idx": "52d8f112-6290-4dd3-bc28-f9bd5deeb7c8",
+#     "generator_type": "local",
+#     "model": {
+#       "idx": "fa0c11e1-46d1-4df8-a6d8-48cf6b99eff3",
+#       "type": "llm",
+#       "model_id": "qwen2-7b-instruct",
+#       "model_path": "/home/user/models/qwen2-7b-instruct/INT4_compressed_weights",
+#       "device": "auto"
+#     }
+#   },
+#   "status": {
+#     "active": true
+#   }
+# }
+
+# Prepare data from local directory
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"#REPLACE WITH YOUR LOCAL DOC DIR#"}' | jq '.'
+
+# Validate Mega Service
+curl -X POST http://${HOST_IP}:16011/v1/chatqna -H "Content-Type: application/json" -d '{"messages":"#REPLACE WITH YOUR QUESTION HERE#", "top_n":5, "max_tokens":512}' | jq '.'
+```
+
+### ChatQnA with LLM Example (UI)
+
+Open your browser, access http://${HOST_IP}:8082
+
+> Your browser should be running on the same host of your console, otherwise you will need to access UI with your host domain name instead of ${HOST_IP}.
+
+### (Optional) Launch vLLM with OpenVINO service
+
+```bash
+# 1. export LLM_MODEL
+export LLM_MODEL="your model id"
+# 2. Uncomment below code in 'GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml'
+  # vllm-service:
+  #   image: vllm:openvino
+  #   container_name: vllm-openvino-server
+  #   depends_on:
+  #     - vllm-service
+  #   ports:
+  #     - "8008:80"
+  #   environment:
+  #     no_proxy: ${no_proxy}
+  #     http_proxy: ${http_proxy}
+  #     https_proxy: ${https_proxy}
+  #     vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+  #     LLM_MODEL: ${LLM_MODEL}
+  #   entrypoint: /bin/bash -c "\
+  #     cd / && \
+  #     export VLLM_CPU_KVCACHE_SPACE=50 && \
+  #     python3 -m vllm.entrypoints.openai.api_server \
+  #       --model '${LLM_MODEL}' \
+  #       --host 0.0.0.0 \
+  #       --port 80"
+```
+
+## Advanced User Guide
+
+### Pipeline Management
+
+#### Create a pipeline
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline.json | jq '.'
+```
+
+It will take some time to prepare the embedding model.
+
+#### Upload a text
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.'
+```
+
+#### Provide a query to retrieve context with similarity search.
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d @examples/test_query.json | jq '.'
+```
+
+#### Create the second pipeline test2
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline2.json | jq '.'
+```
+
+#### Check all pipelines
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" | jq '.'
+```
+
+#### Compare similarity retrieval (test1) and keyword retrieval (test2)
+
+```bash
+# Activate pipeline test1
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test1 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.'
+# Similarity retrieval
+curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.'
+
+# Activate pipeline test2
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test2 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.'
+# Keyword retrieval
+curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.'
+
+```
+
+### Model Management
+
+#### Load a model
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d @examples/test_model_load.json | jq '.'
+```
+
+It will take some time to load the model.
+
+#### Check all models
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" | jq '.'
+```
+
+#### Update a model
+
+```bash
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d @examples/test_model_update.json | jq '.'
+```
+
+#### Check a certain model
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" | jq '.'
+```
+
+#### Delete a model
+
+```bash
+curl -X DELETE http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" | jq '.'
+```
+
+### File Management
+
+#### Add a text
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.'
+```
+
+#### Add files from existed file path
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_dir.json | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.'
+```
+
+#### Check all files
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/data/files -H "Content-Type: application/json" | jq '.'
+```
+
+#### Check one file
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type: application/json" | jq '.'
+```
+
+#### Delete a file
+
+```bash
+curl -X DELETE http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type: application/json" | jq '.'
+```
+
+#### Update a file
+
+```bash
+curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.'
+```
diff --git a/EdgeCraftRAG/chatqna.py b/EdgeCraftRAG/chatqna.py
new file mode 100644
index 0000000000..1afa9621ce
--- /dev/null
+++ b/EdgeCraftRAG/chatqna.py
@@ -0,0 +1,72 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from comps import MicroService, ServiceOrchestrator, ServiceType
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "127.0.0.1")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 16011))
+PIPELINE_SERVICE_HOST_IP = os.getenv("PIPELINE_SERVICE_HOST_IP", "127.0.0.1")
+PIPELINE_SERVICE_PORT = int(os.getenv("PIPELINE_SERVICE_PORT", 16010))
+
+from comps import Gateway, MegaServiceEndpoint
+from comps.cores.proto.api_protocol import (
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    ChatCompletionResponseChoice,
+    ChatMessage,
+    UsageInfo,
+)
+from fastapi import Request
+from fastapi.responses import StreamingResponse
+
+
+class EdgeCraftRagGateway(Gateway):
+    def __init__(self, megaservice, host="0.0.0.0", port=16011):
+        super().__init__(
+            megaservice, host, port, str(MegaServiceEndpoint.CHAT_QNA), ChatCompletionRequest, ChatCompletionResponse
+        )
+
+    async def handle_request(self, request: Request):
+        input = await request.json()
+        result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=input)
+        for node, response in result_dict.items():
+            if isinstance(response, StreamingResponse):
+                return response
+        last_node = runtime_graph.all_leaves()[-1]
+        response = result_dict[last_node]
+        choices = []
+        usage = UsageInfo()
+        choices.append(
+            ChatCompletionResponseChoice(
+                index=0,
+                message=ChatMessage(role="assistant", content=response),
+                finish_reason="stop",
+            )
+        )
+        return ChatCompletionResponse(model="edgecraftrag", choices=choices, usage=usage)
+
+
+class EdgeCraftRagService:
+    def __init__(self, host="0.0.0.0", port=16010):
+        self.host = host
+        self.port = port
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        edgecraftrag = MicroService(
+            name="pipeline",
+            host=PIPELINE_SERVICE_HOST_IP,
+            port=PIPELINE_SERVICE_PORT,
+            endpoint="/v1/chatqna",
+            use_remote_service=True,
+            service_type=ServiceType.UNDEFINED,
+        )
+        self.megaservice.add(edgecraftrag)
+        self.gateway = EdgeCraftRagGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+
+if __name__ == "__main__":
+    edgecraftrag = EdgeCraftRagService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    edgecraftrag.add_remote_service()
diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml
new file mode 100644
index 0000000000..f877b7c582
--- /dev/null
+++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml
@@ -0,0 +1,78 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  server:
+    image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
+    container_name: edgecraftrag-server
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_ENDPOINT: ${HF_ENDPOINT}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+    volumes:
+      - ${MODEL_PATH:-${PWD}}:/home/user/models
+      - ${DOC_PATH:-${PWD}}:/home/user/docs
+    ports:
+      - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
+    devices:
+      - /dev/dri:/dev/dri
+    group_add:
+      - video
+  ecrag:
+    image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
+    container_name: edgecraftrag
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
+      PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
+      PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
+    ports:
+      - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011}
+    depends_on:
+      - server
+  ui:
+    image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
+    container_name: edgecraftrag-ui
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
+      PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
+      PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
+      UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
+      UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
+    ports:
+        - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
+    restart: always
+    depends_on:
+      - server
+      - ecrag
+  # vllm-service:
+  #   image: vllm:openvino
+  #   container_name: vllm-openvino-server
+  #   ports:
+  #     - "8008:80"
+  #   environment:
+  #     no_proxy: ${no_proxy}
+  #     http_proxy: ${http_proxy}
+  #     https_proxy: ${https_proxy}
+  #     vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+  #     LLM_MODEL: ${LLM_MODEL}
+  #   entrypoint: /bin/bash -c "\
+  #     cd / && \
+  #     export VLLM_CPU_KVCACHE_SPACE=50 && \
+  #     python3 -m vllm.entrypoints.openai.api_server \
+  #       --model '${LLM_MODEL}' \
+  #       --host 0.0.0.0 \
+  #       --port 80"
+
+networks:
+  default:
+    driver: bridge
diff --git a/EdgeCraftRAG/docker_image_build/build.yaml b/EdgeCraftRAG/docker_image_build/build.yaml
new file mode 100644
index 0000000000..e0cc355cc6
--- /dev/null
+++ b/EdgeCraftRAG/docker_image_build/build.yaml
@@ -0,0 +1,28 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  server:
+    build:
+      context: ..
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+      dockerfile: ./Dockerfile.server
+    image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
+  ui:
+    build:
+      context: ..
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+      dockerfile: ./ui/docker/Dockerfile.ui
+    image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
+  ecrag:
+    build:
+      context: ..
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
diff --git a/EdgeCraftRAG/edgecraftrag/__init__.py b/EdgeCraftRAG/edgecraftrag/__init__.py
new file mode 100644
index 0000000000..916f3a44b2
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/edgecraftrag/api/__init__.py b/EdgeCraftRAG/edgecraftrag/api/__init__.py
new file mode 100644
index 0000000000..916f3a44b2
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/__init__.py b/EdgeCraftRAG/edgecraftrag/api/v1/__init__.py
new file mode 100644
index 0000000000..916f3a44b2
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py
new file mode 100644
index 0000000000..dfd32c29e6
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from comps.cores.proto.api_protocol import ChatCompletionRequest
+from edgecraftrag.context import ctx
+from fastapi import FastAPI
+
+chatqna_app = FastAPI()
+
+
+# Retrieval
+@chatqna_app.post(path="/v1/retrieval")
+async def retrieval(request: ChatCompletionRequest):
+    nodeswithscore = ctx.get_pipeline_mgr().run_retrieve(chat_request=request)
+    print(nodeswithscore)
+    if nodeswithscore is not None:
+        ret = []
+        for n in nodeswithscore:
+            ret.append((n.node.node_id, n.node.text, n.score))
+        return ret
+
+    return "Not found"
+
+
+# ChatQnA
+@chatqna_app.post(path="/v1/chatqna")
+async def chatqna(request: ChatCompletionRequest):
+    ret = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+    return str(ret)
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/data.py b/EdgeCraftRAG/edgecraftrag/api/v1/data.py
new file mode 100644
index 0000000000..fb5b327929
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/data.py
@@ -0,0 +1,102 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from edgecraftrag.api_schema import DataIn, FilesIn
+from edgecraftrag.context import ctx
+from fastapi import FastAPI
+
+data_app = FastAPI()
+
+
+# Upload a text or files
+@data_app.post(path="/v1/data")
+async def add_data(request: DataIn):
+    nodelist = None
+
+    docs = []
+    if request.text is not None:
+        docs.extend(ctx.get_file_mgr().add_text(text=request.text))
+    if request.local_path is not None:
+        docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path))
+
+    nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=docs)
+    if nodelist is None:
+        return "Error"
+    pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    # TODO: Need bug fix, when node_parser is None
+    ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist)
+    return "Done"
+
+
+# Upload files by a list of file_path
+@data_app.post(path="/v1/data/files")
+async def add_files(request: FilesIn):
+    nodelist = None
+
+    docs = []
+    if request.local_paths is not None:
+        docs.extend(ctx.get_file_mgr().add_files(docs=request.local_paths))
+
+    nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=docs)
+    if nodelist is None:
+        return "Error"
+    pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    # TODO: Need bug fix, when node_parser is None
+    ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist)
+    return "Done"
+
+
+# GET files
+@data_app.get(path="/v1/data/files")
+async def get_files():
+    return ctx.get_file_mgr().get_files()
+
+
+# GET a file
+@data_app.get(path="/v1/data/files")
+async def get_file_docs(name):
+    return ctx.get_file_mgr().get_docs_by_file(name)
+
+
+# DELETE a file
+@data_app.delete(path="/v1/data/files/{name}")
+async def delete_file(name):
+    if ctx.get_file_mgr().del_file(name):
+        # TODO: delete the nodes related to the file
+        all_docs = ctx.get_file_mgr().get_all_docs()
+
+        nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs)
+        if nodelist is None:
+            return "Error"
+        pl = ctx.get_pipeline_mgr().get_active_pipeline()
+        ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx)
+        ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist)
+        return f"File {name} is deleted"
+    else:
+        return f"File {name} not found"
+
+
+# UPDATE a file
+@data_app.patch(path="/v1/data/files/{name}")
+async def update_file(name, request: DataIn):
+    # 1. Delete
+    if ctx.get_file_mgr().del_file(name):
+        # 2. Add
+        docs = []
+        if request.text is not None:
+            docs.extend(ctx.get_file_mgr().add_text(text=request.text))
+        if request.local_path is not None:
+            docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path))
+
+        # 3. Re-run the pipeline
+        # TODO: update the nodes related to the file
+        all_docs = ctx.get_file_mgr().get_all_docs()
+        nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs)
+        if nodelist is None:
+            return "Error"
+        pl = ctx.get_pipeline_mgr().get_active_pipeline()
+        ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx)
+        ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist)
+        return f"File {name} is updated"
+    else:
+        return f"File {name} not found"
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/model.py b/EdgeCraftRAG/edgecraftrag/api/v1/model.py
new file mode 100644
index 0000000000..17044ae91f
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/model.py
@@ -0,0 +1,76 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import gc
+
+from edgecraftrag.api_schema import ModelIn
+from edgecraftrag.context import ctx
+from fastapi import FastAPI
+
+model_app = FastAPI()
+
+
+# GET Models
+@model_app.get(path="/v1/settings/models")
+async def get_models():
+    return ctx.get_model_mgr().get_models()
+
+
+# GET Model
+@model_app.get(path="/v1/settings/models/{model_id:path}")
+async def get_model_by_name(model_id):
+    return ctx.get_model_mgr().get_model_by_name(model_id)
+
+
+# POST Model
+@model_app.post(path="/v1/settings/models")
+async def add_model(request: ModelIn):
+    modelmgr = ctx.get_model_mgr()
+    # Currently use asyncio.Lock() to deal with multi-requests
+    async with modelmgr._lock:
+        model = modelmgr.search_model(request)
+        if model is None:
+            model = modelmgr.load_model(request)
+            modelmgr.add(model)
+    return model.model_id + " model loaded"
+
+
+# PATCH Model
+@model_app.patch(path="/v1/settings/models/{model_id:path}")
+async def update_model(model_id, request: ModelIn):
+    # The process of patch model is : 1.delete model 2.create model
+    active_pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    modelmgr = ctx.get_model_mgr()
+    if active_pl and active_pl.model_existed(model_id):
+        return "Model is being used by active pipeline, unable to update model"
+    else:
+        async with modelmgr._lock:
+            if modelmgr.get_model_by_name(model_id) is None:
+                # Need to make sure original model still exists before updating model
+                # to prevent memory leak in concurrent requests situation
+                return "Model " + model_id + " not exists"
+            model = modelmgr.search_model(request)
+            if model is None:
+                modelmgr.del_model_by_name(model_id)
+                # Clean up memory occupation
+                gc.collect()
+                # load new model
+                model = modelmgr.load_model(request)
+                modelmgr.add(model)
+        return model
+
+
+# DELETE Model
+@model_app.delete(path="/v1/settings/models/{model_id:path}")
+async def delete_model(model_id):
+    active_pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    if active_pl and active_pl.model_existed(model_id):
+        return "Model is being used by active pipeline, unable to remove"
+    else:
+        modelmgr = ctx.get_model_mgr()
+        # Currently use asyncio.Lock() to deal with multi-requests
+        async with modelmgr._lock:
+            response = modelmgr.del_model_by_name(model_id)
+            # Clean up memory occupation
+            gc.collect()
+        return response
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py
new file mode 100644
index 0000000000..9d008e82f7
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py
@@ -0,0 +1,180 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import weakref
+
+from edgecraftrag.api_schema import PipelineCreateIn
+from edgecraftrag.base import IndexerType, InferenceType, ModelType, NodeParserType, PostProcessorType, RetrieverType
+from edgecraftrag.components.generator import QnAGenerator
+from edgecraftrag.components.indexer import VectorIndexer
+from edgecraftrag.components.node_parser import HierarchyNodeParser, SimpleNodeParser, SWindowNodeParser
+from edgecraftrag.components.postprocessor import MetadataReplaceProcessor, RerankProcessor
+from edgecraftrag.components.retriever import AutoMergeRetriever, SimpleBM25Retriever, VectorSimRetriever
+from edgecraftrag.context import ctx
+from fastapi import FastAPI
+
+pipeline_app = FastAPI()
+
+
+# GET Pipelines
+@pipeline_app.get(path="/v1/settings/pipelines")
+async def get_pipelines():
+    return ctx.get_pipeline_mgr().get_pipelines()
+
+
+# GET Pipeline
+@pipeline_app.get(path="/v1/settings/pipelines/{name}")
+async def get_pipeline(name):
+    return ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name)
+
+
+# POST Pipeline
+@pipeline_app.post(path="/v1/settings/pipelines")
+async def add_pipeline(request: PipelineCreateIn):
+    pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(request.name)
+    if pl is None:
+        pl = ctx.get_pipeline_mgr().create_pipeline(request.name)
+    active_pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    if pl == active_pl:
+        if not request.active:
+            pass
+        else:
+            return "Unable to patch an active pipeline..."
+    update_pipeline_handler(pl, request)
+    return pl
+
+
+# PATCH Pipeline
+@pipeline_app.patch(path="/v1/settings/pipelines/{name}")
+async def update_pipeline(name, request: PipelineCreateIn):
+    pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name)
+    if pl is None:
+        return None
+    active_pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    if pl == active_pl:
+        if not request.active:
+            pass
+        else:
+            return "Unable to patch an active pipeline..."
+    async with ctx.get_pipeline_mgr()._lock:
+        update_pipeline_handler(pl, request)
+    return pl
+
+
+def update_pipeline_handler(pl, req):
+    if req.node_parser is not None:
+        np = req.node_parser
+        found_parser = ctx.get_node_parser_mgr().search_parser(np)
+        if found_parser is not None:
+            pl.node_parser = found_parser
+        else:
+            match np.parser_type:
+                case NodeParserType.SIMPLE:
+                    pl.node_parser = SimpleNodeParser(chunk_size=np.chunk_size, chunk_overlap=np.chunk_overlap)
+                case NodeParserType.HIERARCHY:
+                    """
+                    HierarchyNodeParser is for Auto Merging Retriever
+                    (https://docs.llamaindex.ai/en/stable/examples/retrievers/auto_merging_retriever/)
+                    By default, the hierarchy is:
+                    1st level: chunk size 2048
+                    2nd level: chunk size 512
+                    3rd level: chunk size 128
+                    Please set chunk size with List. e.g. chunk_size=[2048,512,128]
+                    """
+                    pl.node_parser = HierarchyNodeParser.from_defaults(
+                        chunk_sizes=np.chunk_sizes, chunk_overlap=np.chunk_overlap
+                    )
+                case NodeParserType.SENTENCEWINDOW:
+                    pl.node_parser = SWindowNodeParser.from_defaults(window_size=np.window_size)
+            ctx.get_node_parser_mgr().add(pl.node_parser)
+
+    if req.indexer is not None:
+        ind = req.indexer
+        found_indexer = ctx.get_indexer_mgr().search_indexer(ind)
+        if found_indexer is not None:
+            pl.indexer = found_indexer
+        else:
+            embed_model = None
+            if ind.embedding_model:
+                embed_model = ctx.get_model_mgr().search_model(ind.embedding_model)
+                if embed_model is None:
+                    ind.embedding_model.model_type = ModelType.EMBEDDING
+                    embed_model = ctx.get_model_mgr().load_model(ind.embedding_model)
+                    ctx.get_model_mgr().add(embed_model)
+            match ind.indexer_type:
+                case IndexerType.DEFAULT_VECTOR | IndexerType.FAISS_VECTOR:
+                    # TODO: **RISK** if considering 2 pipelines with different
+                    # nodes, but same indexer, what will happen?
+                    pl.indexer = VectorIndexer(embed_model, ind.indexer_type)
+                case _:
+                    pass
+            ctx.get_indexer_mgr().add(pl.indexer)
+
+    if req.retriever is not None:
+        retr = req.retriever
+        match retr.retriever_type:
+            case RetrieverType.VECTORSIMILARITY:
+                if pl.indexer is not None:
+                    pl.retriever = VectorSimRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk)
+                else:
+                    return "No indexer"
+            case RetrieverType.AUTOMERGE:
+                # AutoMergeRetriever looks at a set of leaf nodes and recursively "merges" subsets of leaf nodes that reference a parent node
+                if pl.indexer is not None:
+                    pl.retriever = AutoMergeRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk)
+                else:
+                    return "No indexer"
+            case RetrieverType.BM25:
+                if pl.indexer is not None:
+                    pl.retriever = SimpleBM25Retriever(pl.indexer, similarity_top_k=retr.retrieve_topk)
+                else:
+                    return "No indexer"
+            case _:
+                pass
+
+    if req.postprocessor is not None:
+        pp = req.postprocessor
+        pl.postprocessor = []
+        for processor in pp:
+            match processor.processor_type:
+                case PostProcessorType.RERANKER:
+                    if processor.reranker_model:
+                        prm = processor.reranker_model
+                        reranker_model = ctx.get_model_mgr().search_model(prm)
+                        if reranker_model is None:
+                            prm.model_type = ModelType.RERANKER
+                            reranker_model = ctx.get_model_mgr().load_model(prm)
+                            ctx.get_model_mgr().add(reranker_model)
+                        postprocessor = RerankProcessor(reranker_model, processor.top_n)
+                        pl.postprocessor.append(postprocessor)
+                    else:
+                        return "No reranker model"
+                case PostProcessorType.METADATAREPLACE:
+                    postprocessor = MetadataReplaceProcessor(target_metadata_key="window")
+                    pl.postprocessor.append(postprocessor)
+
+    if req.generator:
+        gen = req.generator
+        if gen.model is None:
+            return "No ChatQnA Model"
+        if gen.inference_type == InferenceType.VLLM:
+            if gen.model.model_id:
+                model_ref = gen.model.model_id
+            else:
+                model_ref = gen.model.model_path
+            pl.generator = QnAGenerator(model_ref, gen.prompt_path, gen.inference_type)
+        elif gen.inference_type == InferenceType.LOCAL:
+            model = ctx.get_model_mgr().search_model(gen.model)
+            if model is None:
+                gen.model.model_type = ModelType.LLM
+                model = ctx.get_model_mgr().load_model(gen.model)
+                ctx.get_model_mgr().add(model)
+            # Use weakref to achieve model deletion and memory release
+            model_ref = weakref.ref(model)
+            pl.generator = QnAGenerator(model_ref, gen.prompt_path, gen.inference_type)
+        else:
+            return "Inference Type Not Supported"
+
+    if pl.status.active != req.active:
+        ctx.get_pipeline_mgr().activate_pipeline(pl.name, req.active, ctx.get_node_mgr())
+    return pl
diff --git a/EdgeCraftRAG/edgecraftrag/api_schema.py b/EdgeCraftRAG/edgecraftrag/api_schema.py
new file mode 100644
index 0000000000..1f124a7f9a
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api_schema.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional
+
+from pydantic import BaseModel
+
+
+class ModelIn(BaseModel):
+    model_type: Optional[str] = "LLM"
+    model_id: Optional[str]
+    model_path: Optional[str] = "./"
+    device: Optional[str] = "cpu"
+
+
+class NodeParserIn(BaseModel):
+    chunk_size: Optional[int] = None
+    chunk_overlap: Optional[int] = None
+    chunk_sizes: Optional[list] = None
+    parser_type: str
+    window_size: Optional[int] = None
+
+
+class IndexerIn(BaseModel):
+    indexer_type: str
+    embedding_model: Optional[ModelIn] = None
+
+
+class RetrieverIn(BaseModel):
+    retriever_type: str
+    retrieve_topk: Optional[int] = 3
+
+
+class PostProcessorIn(BaseModel):
+    processor_type: str
+    reranker_model: Optional[ModelIn] = None
+    top_n: Optional[int] = 5
+
+
+class GeneratorIn(BaseModel):
+    prompt_path: Optional[str] = None
+    model: Optional[ModelIn] = None
+    inference_type: Optional[str] = "local"
+
+
+class PipelineCreateIn(BaseModel):
+    name: Optional[str] = None
+    node_parser: Optional[NodeParserIn] = None
+    indexer: Optional[IndexerIn] = None
+    retriever: Optional[RetrieverIn] = None
+    postprocessor: Optional[list[PostProcessorIn]] = None
+    generator: Optional[GeneratorIn] = None
+    active: Optional[bool] = False
+
+
+class DataIn(BaseModel):
+    text: Optional[str] = None
+    local_path: Optional[str] = None
+
+
+class FilesIn(BaseModel):
+    local_paths: Optional[list[str]] = None
diff --git a/EdgeCraftRAG/edgecraftrag/base.py b/EdgeCraftRAG/edgecraftrag/base.py
new file mode 100644
index 0000000000..d8c7aaef84
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/base.py
@@ -0,0 +1,128 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import abc
+import uuid
+from enum import Enum
+from typing import Any, Callable, List, Optional
+
+from pydantic import BaseModel, ConfigDict, Field, model_serializer
+
+
+class CompType(str, Enum):
+
+    DEFAULT = "default"
+    MODEL = "model"
+    PIPELINE = "pipeline"
+    NODEPARSER = "node_parser"
+    INDEXER = "indexer"
+    RETRIEVER = "retriever"
+    POSTPROCESSOR = "postprocessor"
+    GENERATOR = "generator"
+    FILE = "file"
+
+
+class ModelType(str, Enum):
+
+    EMBEDDING = "embedding"
+    RERANKER = "reranker"
+    LLM = "llm"
+
+
+class FileType(str, Enum):
+    TEXT = "text"
+    VISUAL = "visual"
+    AURAL = "aural"
+    VIRTUAL = "virtual"
+    OTHER = "other"
+
+
+class NodeParserType(str, Enum):
+
+    DEFAULT = "default"
+    SIMPLE = "simple"
+    HIERARCHY = "hierarchical"
+    SENTENCEWINDOW = "sentencewindow"
+
+
+class IndexerType(str, Enum):
+
+    DEFAULT = "default"
+    FAISS_VECTOR = "faiss_vector"
+    DEFAULT_VECTOR = "vector"
+
+
+class RetrieverType(str, Enum):
+
+    DEFAULT = "default"
+    VECTORSIMILARITY = "vectorsimilarity"
+    AUTOMERGE = "auto_merge"
+    BM25 = "bm25"
+
+
+class PostProcessorType(str, Enum):
+
+    RERANKER = "reranker"
+    METADATAREPLACE = "metadata_replace"
+
+
+class GeneratorType(str, Enum):
+
+    CHATQNA = "chatqna"
+
+
+class InferenceType(str, Enum):
+
+    LOCAL = "local"
+    VLLM = "vllm"
+
+
+class CallbackType(str, Enum):
+
+    DATAPREP = "dataprep"
+    RETRIEVE = "retrieve"
+    PIPELINE = "pipeline"
+
+
+class BaseComponent(BaseModel):
+
+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
+
+    idx: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    name: Optional[str] = Field(default="")
+    comp_type: str = Field(default="")
+    comp_subtype: Optional[str] = Field(default="")
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "name": self.name,
+            "comp_type": self.comp_type,
+            "comp_subtype": self.comp_subtype,
+        }
+        return set
+
+    @abc.abstractmethod
+    def run(self, **kwargs) -> Any:
+        pass
+
+
+class BaseMgr:
+
+    def __init__(self):
+        self.components = {}
+
+    def add(self, comp: BaseComponent):
+        self.components[comp.idx] = comp
+
+    def get(self, idx: str) -> BaseComponent:
+        if idx in self.components:
+            return self.components[idx]
+        else:
+            return None
+
+    def remove(self, idx):
+        # remove the reference count
+        # after reference count == 0, object memory can be freed with Garbage Collector
+        del self.components[idx]
diff --git a/EdgeCraftRAG/edgecraftrag/components/__init__.py b/EdgeCraftRAG/edgecraftrag/components/__init__.py
new file mode 100644
index 0000000000..916f3a44b2
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/edgecraftrag/components/data.py b/EdgeCraftRAG/edgecraftrag/components/data.py
new file mode 100644
index 0000000000..e7fa19e7ad
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/data.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+from typing import Any, List, Optional
+
+from edgecraftrag.base import BaseComponent, CompType, FileType
+from llama_index.core.schema import Document
+from pydantic import BaseModel, Field, model_serializer
+
+
+class File(BaseComponent):
+    file_path: str = Field(default="")
+    comp_subtype: str = Field(default="")
+    documents: List[Document] = Field(default=[])
+
+    def __init__(self, file_name: Optional[str] = None, file_path: Optional[str] = None, content: Optional[str] = None):
+        super().__init__(comp_type=CompType.FILE)
+
+        if not file_name and not file_path:
+            raise ValueError("File name or path must be provided")
+
+        _path = Path(file_path) if file_path else None
+        if file_name:
+            self.name = file_name
+        else:
+            self.name = _path.name
+        self.file_path = _path
+        self.comp_subtype = FileType.TEXT
+        if _path and _path.exists():
+            self.documents.extend(convert_file_to_documents(_path))
+        if content:
+            self.documents.extend(convert_text_to_documents(content))
+
+    def run(self, **kwargs) -> Any:
+        pass
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "file_name": self.name,
+            "file_id": self.idx,
+            "file_type": self.comp_subtype,
+            "file_path": str(self.file_path),
+            "docs_count": len(self.documents),
+        }
+        return set
+
+
+def convert_text_to_documents(text) -> List[Document]:
+    return [Document(text=text, metadata={"file_name": "text"})]
+
+
+def convert_file_to_documents(file_path) -> List[Document]:
+    from llama_index.core import SimpleDirectoryReader
+
+    supported_exts = [".pdf", ".txt", ".doc", ".docx", ".pptx", ".ppt", ".csv", ".md", ".html", ".rst"]
+    if file_path.is_dir():
+        docs = SimpleDirectoryReader(input_dir=file_path, recursive=True, required_exts=supported_exts).load_data()
+    elif file_path.is_file():
+        docs = SimpleDirectoryReader(input_files=[file_path], required_exts=supported_exts).load_data()
+    else:
+        docs = []
+
+    return docs
diff --git a/EdgeCraftRAG/edgecraftrag/components/generator.py b/EdgeCraftRAG/edgecraftrag/components/generator.py
new file mode 100644
index 0000000000..cbfd6686d0
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/generator.py
@@ -0,0 +1,194 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import dataclasses
+import os
+
+from comps import GeneratedDoc, opea_telemetry
+from edgecraftrag.base import BaseComponent, CompType, GeneratorType
+from fastapi.responses import StreamingResponse
+from langchain_core.prompts import PromptTemplate
+from llama_index.llms.openai_like import OpenAILike
+from pydantic import model_serializer
+
+
+@opea_telemetry
+def post_process_text(text: str):
+    if text == " ":
+        return "data: @#$\n\n"
+    if text == "\n":
+        return "data: <br/>\n\n"
+    if text.isspace():
+        return None
+    new_text = text.replace(" ", "@#$")
+    return f"data: {new_text}\n\n"
+
+
+class QnAGenerator(BaseComponent):
+
+    def __init__(self, llm_model, prompt_template, inference_type, **kwargs):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.GENERATOR,
+            comp_subtype=GeneratorType.CHATQNA,
+        )
+        self.inference_type = inference_type
+        self._REPLACE_PAIRS = (
+            ("\n\n", "\n"),
+            ("\t\n", "\n"),
+        )
+        template = prompt_template
+        self.prompt = (
+            DocumentedContextRagPromptTemplate.from_file(template)
+            if os.path.isfile(template)
+            else DocumentedContextRagPromptTemplate.from_template(template)
+        )
+        self.llm = llm_model
+        if isinstance(llm_model, str):
+            self.model_id = llm_model
+        else:
+            self.model_id = llm_model().model_id
+
+    def clean_string(self, string):
+        ret = string
+        for p in self._REPLACE_PAIRS:
+            ret = ret.replace(*p)
+        return ret
+
+    def run(self, chat_request, retrieved_nodes, **kwargs):
+        if self.llm() is None:
+            # This could happen when User delete all LLMs through RESTful API
+            return "No LLM available, please load LLM"
+        # query transformation
+        text_gen_context = ""
+        for n in retrieved_nodes:
+            origin_text = n.node.get_text()
+            text_gen_context += self.clean_string(origin_text.strip())
+
+        query = chat_request.messages
+        prompt_str = self.prompt.format(input=query, context=text_gen_context)
+        generate_kwargs = dict(
+            temperature=chat_request.temperature,
+            do_sample=chat_request.temperature > 0.0,
+            top_p=chat_request.top_p,
+            top_k=chat_request.top_k,
+            typical_p=chat_request.typical_p,
+            repetition_penalty=chat_request.repetition_penalty,
+        )
+        self.llm().generate_kwargs = generate_kwargs
+
+        return self.llm().complete(prompt_str)
+
+    def run_vllm(self, chat_request, retrieved_nodes, **kwargs):
+        if self.llm is None:
+            return "No LLM provided, please provide model_id_or_path"
+        # query transformation
+        text_gen_context = ""
+        for n in retrieved_nodes:
+            origin_text = n.node.get_text()
+            text_gen_context += self.clean_string(origin_text.strip())
+
+        query = chat_request.messages
+        prompt_str = self.prompt.format(input=query, context=text_gen_context)
+
+        llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008")
+        model_name = self.llm
+        llm = OpenAILike(
+            api_key="fake",
+            api_base=llm_endpoint + "/v1",
+            max_tokens=chat_request.max_tokens,
+            model=model_name,
+            top_p=chat_request.top_p,
+            temperature=chat_request.temperature,
+            streaming=chat_request.stream,
+        )
+
+        if chat_request.stream:
+
+            async def stream_generator():
+                response = await llm.astream_complete(prompt_str)
+                async for text in response:
+                    output = text.text
+                    yield f"data: {output}\n\n"
+
+                yield "data: [DONE]\n\n"
+
+            return StreamingResponse(stream_generator(), media_type="text/event-stream")
+        else:
+            response = llm.complete(prompt_str)
+            response = response.text
+
+            return GeneratedDoc(text=response, prompt=prompt_str)
+
+    @model_serializer
+    def ser_model(self):
+        set = {"idx": self.idx, "generator_type": self.comp_subtype, "model": self.model_id}
+        return set
+
+
+@dataclasses.dataclass
+class INSTRUCTIONS:
+    IM_START = "You are an AI assistant that helps users answer questions given a specific context."
+    SUCCINCT = "Ensure your response is succinct"
+    ACCURATE = "Ensure your response is accurate."
+    SUCCINCT_AND_ACCURATE = "Ensure your response is succinct. Try to be accurate if possible."
+    ACCURATE_AND_SUCCINCT = "Ensure your response is accurate. Try to be succinct if possible."
+    NO_RAMBLING = "Avoid posing new questions or self-questioning and answering, and refrain from repeating words in your response."
+    SAY_SOMETHING = "Avoid meaningless answer such a random symbol or blanks."
+    ENCOURAGE = "If you cannot well understand the question, try to translate it into English, and translate the answer back to the language of the question."
+    NO_IDEA = (
+        'If the answer is not discernible, please respond with "Sorry. I have no idea" in the language of the question.'
+    )
+    CLOZE_TEST = """The task is a fill-in-the-blank/cloze test."""
+    NO_MEANINGLESS_SYMBOLS = "Meaningless symbols and ``` should not be included in your response."
+    ADAPT_NATIVE_LANGUAGE = "Please try to think like a person that speak the same language that the question used."
+
+
+def _is_cloze(question):
+    return ("()" in question or "（）" in question) and ("填" in question or "fill" in question or "cloze" in question)
+
+
+# depreciated
+def get_instructions(question):
+    # naive pre-retrieval rewrite
+    # cloze
+    if _is_cloze(question):
+        instructions = [
+            INSTRUCTIONS.CLOZE_TEST,
+        ]
+    else:
+        instructions = [
+            INSTRUCTIONS.ACCURATE_AND_SUCCINCT,
+            INSTRUCTIONS.NO_RAMBLING,
+            INSTRUCTIONS.NO_MEANINGLESS_SYMBOLS,
+        ]
+    return ["System: {}".format(_) for _ in instructions]
+
+
+def preprocess_question(question):
+    if _is_cloze(question):
+        question = question.replace(" ", "").replace("（", "(").replace("）", ")")
+        # .replace("()", " <|blank|> ")
+        ret = "User: Please finish the following fill-in-the-blank question marked by $$$ at the beginning and end. Make sure all the () are filled.\n$$$\n{}\n$$$\nAssistant: ".format(
+            question
+        )
+    else:
+        ret = "User: {}\nAssistant: 从上下文提供的信息中可以知道，".format(question)
+    return ret
+
+
+class DocumentedContextRagPromptTemplate(PromptTemplate):
+
+    def format(self, **kwargs) -> str:
+        # context = '\n'.join([clean_string(f"{_.page_content}".strip()) for i, _ in enumerate(kwargs["context"])])
+        context = kwargs["context"]
+        question = kwargs["input"]
+        preprocessed_question = preprocess_question(question)
+        if "instructions" in self.template:
+            instructions = get_instructions(question)
+            prompt_str = self.template.format(
+                context=context, instructions="\n".join(instructions), input=preprocessed_question
+            )
+        else:
+            prompt_str = self.template.format(context=context, input=preprocessed_question)
+        return prompt_str
diff --git a/EdgeCraftRAG/edgecraftrag/components/indexer.py b/EdgeCraftRAG/edgecraftrag/components/indexer.py
new file mode 100644
index 0000000000..83346d4901
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/indexer.py
@@ -0,0 +1,45 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any
+
+import faiss
+from edgecraftrag.base import BaseComponent, CompType, IndexerType
+from llama_index.core import StorageContext, VectorStoreIndex
+from llama_index.vector_stores.faiss import FaissVectorStore
+from pydantic import model_serializer
+
+
+class VectorIndexer(BaseComponent, VectorStoreIndex):
+
+    def __init__(self, embed_model, vector_type):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.INDEXER,
+            comp_subtype=vector_type,
+        )
+        self.model = embed_model
+        if not embed_model:
+            # Settings.embed_model should be set to None when embed_model is None to avoid 'no oneapi key' error
+            from llama_index.core import Settings
+
+            Settings.embed_model = None
+        match vector_type:
+            case IndexerType.DEFAULT_VECTOR:
+                VectorStoreIndex.__init__(self, embed_model=embed_model, nodes=[])
+            case IndexerType.FAISS_VECTOR:
+                if embed_model:
+                    d = embed_model._model.request.outputs[0].get_partial_shape()[2].get_length()
+                else:
+                    d = 128
+                faiss_index = faiss.IndexFlatL2(d)
+                faiss_store = StorageContext.from_defaults(vector_store=FaissVectorStore(faiss_index=faiss_index))
+                VectorStoreIndex.__init__(self, embed_model=embed_model, nodes=[], storage_context=faiss_store)
+
+    def run(self, **kwargs) -> Any:
+        pass
+
+    @model_serializer
+    def ser_model(self):
+        set = {"idx": self.idx, "indexer_type": self.comp_subtype, "model": self.model}
+        return set
diff --git a/EdgeCraftRAG/edgecraftrag/components/model.py b/EdgeCraftRAG/edgecraftrag/components/model.py
new file mode 100644
index 0000000000..72ee7f16e0
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/model.py
@@ -0,0 +1,74 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any, Optional
+
+from edgecraftrag.base import BaseComponent, CompType, ModelType
+from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding
+from llama_index.llms.openvino import OpenVINOLLM
+from llama_index.postprocessor.openvino_rerank import OpenVINORerank
+from pydantic import Field, model_serializer
+
+
+class BaseModelComponent(BaseComponent):
+
+    model_id: Optional[str] = Field(default="")
+    model_path: Optional[str] = Field(default="")
+    device: Optional[str] = Field(default="cpu")
+
+    def run(self, **kwargs) -> Any:
+        pass
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "type": self.comp_subtype,
+            "model_id": self.model_id,
+            "model_path": self.model_path,
+            "device": self.device,
+        }
+        return set
+
+
+class OpenVINOEmbeddingModel(BaseModelComponent, OpenVINOEmbedding):
+
+    def __init__(self, model_id, model_path, device):
+        OpenVINOEmbedding.create_and_save_openvino_model(model_id, model_path)
+        OpenVINOEmbedding.__init__(self, model_id_or_path=model_path, device=device)
+        self.comp_type = CompType.MODEL
+        self.comp_subtype = ModelType.EMBEDDING
+        self.model_id = model_id
+        self.model_path = model_path
+        self.device = device
+
+
+class OpenVINORerankModel(BaseModelComponent, OpenVINORerank):
+
+    def __init__(self, model_id, model_path, device):
+        OpenVINORerank.create_and_save_openvino_model(model_id, model_path)
+        OpenVINORerank.__init__(
+            self,
+            model_id_or_path=model_path,
+            device=device,
+        )
+        self.comp_type = CompType.MODEL
+        self.comp_subtype = ModelType.RERANKER
+        self.model_id = model_id
+        self.model_path = model_path
+        self.device = device
+
+
+class OpenVINOLLMModel(BaseModelComponent, OpenVINOLLM):
+
+    def __init__(self, model_id, model_path, device):
+        OpenVINOLLM.__init__(
+            self,
+            model_id_or_path=model_path,
+            device_map=device,
+        )
+        self.comp_type = CompType.MODEL
+        self.comp_subtype = ModelType.LLM
+        self.model_id = model_id
+        self.model_path = model_path
+        self.device = device
diff --git a/EdgeCraftRAG/edgecraftrag/components/node_parser.py b/EdgeCraftRAG/edgecraftrag/components/node_parser.py
new file mode 100644
index 0000000000..cd50f45347
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/node_parser.py
@@ -0,0 +1,85 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any
+
+from edgecraftrag.base import BaseComponent, CompType, NodeParserType
+from llama_index.core.node_parser import HierarchicalNodeParser, SentenceSplitter, SentenceWindowNodeParser
+from pydantic import model_serializer
+
+
+class SimpleNodeParser(BaseComponent, SentenceSplitter):
+
+    # Use super for SentenceSplitter since it's __init__ will cleanup
+    # BaseComponent fields
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.comp_type = CompType.NODEPARSER
+        self.comp_subtype = NodeParserType.SIMPLE
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "docs":
+                return self.get_nodes_from_documents(v, show_progress=False)
+
+        return None
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "parser_type": self.comp_subtype,
+            "chunk_size": self.chunk_size,
+            "chunk_overlap": self.chunk_overlap,
+        }
+        return set
+
+
+class HierarchyNodeParser(BaseComponent, HierarchicalNodeParser):
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.comp_type = CompType.NODEPARSER
+        self.comp_subtype = NodeParserType.HIERARCHY
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "docs":
+                return self.get_nodes_from_documents(v, show_progress=False)
+
+        return None
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "parser_type": self.comp_subtype,
+            "chunk_size": self.chunk_sizes,
+            "chunk_overlap": None,
+        }
+        return set
+
+
+class SWindowNodeParser(BaseComponent, SentenceWindowNodeParser):
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.comp_type = CompType.NODEPARSER
+        self.comp_subtype = NodeParserType.SENTENCEWINDOW
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "docs":
+                return self.get_nodes_from_documents(v, show_progress=False)
+
+        return None
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "parser_type": self.comp_subtype,
+            "chunk_size": None,
+            "chunk_overlap": None,
+        }
+        return set
diff --git a/EdgeCraftRAG/edgecraftrag/components/pipeline.py b/EdgeCraftRAG/edgecraftrag/components/pipeline.py
new file mode 100644
index 0000000000..4a2932e00b
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/pipeline.py
@@ -0,0 +1,160 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any, Callable, List, Optional
+
+from comps.cores.proto.api_protocol import ChatCompletionRequest
+from edgecraftrag.base import BaseComponent, CallbackType, CompType, InferenceType
+from edgecraftrag.components.postprocessor import RerankProcessor
+from llama_index.core.schema import Document, QueryBundle
+from pydantic import BaseModel, Field, model_serializer
+
+
+class PipelineStatus(BaseModel):
+    active: bool = False
+
+
+class Pipeline(BaseComponent):
+
+    node_parser: Optional[BaseComponent] = Field(default=None)
+    indexer: Optional[BaseComponent] = Field(default=None)
+    retriever: Optional[BaseComponent] = Field(default=None)
+    postprocessor: Optional[List[BaseComponent]] = Field(default=None)
+    generator: Optional[BaseComponent] = Field(default=None)
+    status: PipelineStatus = Field(default=PipelineStatus())
+    run_pipeline_cb: Optional[Callable[..., Any]] = Field(default=None)
+    run_retriever_cb: Optional[Callable[..., Any]] = Field(default=None)
+    run_data_prepare_cb: Optional[Callable[..., Any]] = Field(default=None)
+
+    def __init__(
+        self,
+        name,
+    ):
+        super().__init__(name=name, comp_type=CompType.PIPELINE)
+        if self.name == "" or self.name is None:
+            self.name = self.idx
+        self.run_pipeline_cb = run_test_generator
+        self.run_retriever_cb = run_test_retrieve
+        self.run_data_prepare_cb = run_simple_doc
+        self._node_changed = True
+
+    # TODO: consider race condition
+    @property
+    def node_changed(self) -> bool:
+        return self._node_changed
+
+    # TODO: update doc changes
+    # TODO: more operations needed, add, del, modify
+    def update_nodes(self, nodes):
+        print("updating nodes ", nodes)
+        if self.indexer is not None:
+            self.indexer.insert_nodes(nodes)
+
+    # TODO: check more conditions
+    def check_active(self, nodelist):
+        if self._node_changed and nodelist is not None:
+            self.update_nodes(nodelist)
+
+    # Implement abstract run function
+    # callback dispatcher
+    def run(self, **kwargs) -> Any:
+        print(kwargs)
+        if "cbtype" in kwargs:
+            if kwargs["cbtype"] == CallbackType.DATAPREP:
+                if "docs" in kwargs:
+                    return self.run_data_prepare_cb(self, docs=kwargs["docs"])
+            if kwargs["cbtype"] == CallbackType.RETRIEVE:
+                if "chat_request" in kwargs:
+                    return self.run_retriever_cb(self, chat_request=kwargs["chat_request"])
+            if kwargs["cbtype"] == CallbackType.PIPELINE:
+                if "chat_request" in kwargs:
+                    return self.run_pipeline_cb(self, chat_request=kwargs["chat_request"])
+
+    def update(self, node_parser=None, indexer=None, retriever=None, postprocessor=None, generator=None):
+        if node_parser is not None:
+            self.node_parser = node_parser
+        if indexer is not None:
+            self.indexer = indexer
+        if retriever is not None:
+            self.retriever = retriever
+        if postprocessor is not None:
+            self.postprocessor = postprocessor
+        if generator is not None:
+            self.generator = generator
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "name": self.name,
+            "comp_type": self.comp_type,
+            "node_parser": self.node_parser,
+            "indexer": self.indexer,
+            "retriever": self.retriever,
+            "postprocessor": self.postprocessor,
+            "generator": self.generator,
+            "status": self.status,
+        }
+        return set
+
+    def model_existed(self, model_id: str) -> bool:
+        # judge if the given model is existed in a pipeline by model_id
+        if self.indexer:
+            if hasattr(self.indexer, "_embed_model") and self.indexer._embed_model.model_id == model_id:
+                return True
+            if hasattr(self.indexer, "_llm") and self.indexer._llm.model_id == model_id:
+                return True
+        if self.postprocessor:
+            for processor in self.postprocessor:
+                if hasattr(processor, "model_id") and processor.model_id == model_id:
+                    return True
+        if self.generator:
+            llm = self.generator.llm
+            if llm() and llm().model_id == model_id:
+                return True
+        return False
+
+
+# Test callback to retrieve nodes from query
+def run_test_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any:
+    query = chat_request.messages
+    retri_res = pl.retriever.run(query=query)
+    query_bundle = QueryBundle(query)
+    if pl.postprocessor:
+        for processor in pl.postprocessor:
+            if (
+                isinstance(processor, RerankProcessor)
+                and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default
+            ):
+                processor.top_n = chat_request.top_n
+            retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle)
+    return retri_res
+
+
+def run_simple_doc(pl: Pipeline, docs: List[Document]) -> Any:
+    n = pl.node_parser.run(docs=docs)
+    if pl.indexer is not None:
+        pl.indexer.insert_nodes(n)
+    print(pl.indexer._index_struct)
+    return n
+
+
+def run_test_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any:
+    query = chat_request.messages
+    retri_res = pl.retriever.run(query=query)
+    query_bundle = QueryBundle(query)
+    if pl.postprocessor:
+        for processor in pl.postprocessor:
+            if (
+                isinstance(processor, RerankProcessor)
+                and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default
+            ):
+                processor.top_n = chat_request.top_n
+            retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle)
+    if pl.generator is None:
+        return "No Generator Specified"
+    if pl.generator.inference_type == InferenceType.LOCAL:
+        answer = pl.generator.run(chat_request, retri_res)
+    elif pl.generator.inference_type == InferenceType.VLLM:
+        answer = pl.generator.run_vllm(chat_request, retri_res)
+    return answer
diff --git a/EdgeCraftRAG/edgecraftrag/components/postprocessor.py b/EdgeCraftRAG/edgecraftrag/components/postprocessor.py
new file mode 100644
index 0000000000..672826bdbb
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/postprocessor.py
@@ -0,0 +1,64 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any
+
+from edgecraftrag.base import BaseComponent, CompType, PostProcessorType
+from llama_index.core.postprocessor import MetadataReplacementPostProcessor
+from pydantic import model_serializer
+
+
+class RerankProcessor(BaseComponent):
+
+    def __init__(self, rerank_model, top_n):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.POSTPROCESSOR,
+            comp_subtype=PostProcessorType.RERANKER,
+        )
+        self.model = rerank_model
+        self.top_n = top_n
+
+    def run(self, **kwargs) -> Any:
+        self.model.top_n = self.top_n
+        query_bundle = None
+        query_str = None
+        if "retri_res" in kwargs:
+            nodes = kwargs["retri_res"]
+        if "query_bundle" in kwargs:
+            query_bundle = kwargs["query_bundle"]
+        if "query_str" in kwargs:
+            query_str = kwargs["query_str"]
+        return self.model.postprocess_nodes(nodes, query_bundle=query_bundle, query_str=query_str)
+
+    @model_serializer
+    def ser_model(self):
+        set = {"idx": self.idx, "postprocessor_type": self.comp_subtype, "model": self.model, "top_n": self.top_n}
+        return set
+
+
+class MetadataReplaceProcessor(BaseComponent, MetadataReplacementPostProcessor):
+
+    def __init__(self, target_metadata_key="window"):
+        BaseComponent.__init__(
+            self,
+            target_metadata_key=target_metadata_key,
+            comp_type=CompType.POSTPROCESSOR,
+            comp_subtype=PostProcessorType.METADATAREPLACE,
+        )
+
+    def run(self, **kwargs) -> Any:
+        query_bundle = None
+        query_str = None
+        if "retri_res" in kwargs:
+            nodes = kwargs["retri_res"]
+        if "query_bundle" in kwargs:
+            query_bundle = kwargs["query_bundle"]
+        if "query_str" in kwargs:
+            query_str = kwargs["query_str"]
+        return self.postprocess_nodes(nodes, query_bundle=query_bundle, query_str=query_str)
+
+    @model_serializer
+    def ser_model(self):
+        set = {"idx": self.idx, "postprocessor_type": self.comp_subtype, "model": None, "top_n": None}
+        return set
diff --git a/EdgeCraftRAG/edgecraftrag/components/retriever.py b/EdgeCraftRAG/edgecraftrag/components/retriever.py
new file mode 100644
index 0000000000..cba251b2a8
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/retriever.py
@@ -0,0 +1,104 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any, List, cast
+
+from edgecraftrag.base import BaseComponent, CompType, RetrieverType
+from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever
+from llama_index.core.retrievers import AutoMergingRetriever
+from llama_index.core.schema import BaseNode
+from llama_index.retrievers.bm25 import BM25Retriever
+from pydantic import model_serializer
+
+
+class VectorSimRetriever(BaseComponent, VectorIndexRetriever):
+
+    def __init__(self, indexer, **kwargs):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.RETRIEVER,
+            comp_subtype=RetrieverType.VECTORSIMILARITY,
+        )
+        VectorIndexRetriever.__init__(
+            self,
+            index=indexer,
+            node_ids=list(indexer.index_struct.nodes_dict.values()),
+            callback_manager=indexer._callback_manager,
+            object_map=indexer._object_map,
+            **kwargs,
+        )
+        # This might be a bug of llamaindex retriever.
+        # The node_ids will never be updated after the retriever's
+        # creation. However, the node_ids decides the available node
+        # ids to be retrieved which means the target nodes to be
+        # retrieved are freezed to the time of the retriever's creation.
+        self._node_ids = None
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "query":
+                return self.retrieve(v)
+
+        return None
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "retriever_type": self.comp_subtype,
+            "retrieve_topk": self.similarity_top_k,
+        }
+        return set
+
+
+class AutoMergeRetriever(BaseComponent, AutoMergingRetriever):
+
+    def __init__(self, indexer, **kwargs):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.RETRIEVER,
+            comp_subtype=RetrieverType.AUTOMERGE,
+        )
+        self._index = indexer
+        self.topk = kwargs["similarity_top_k"]
+
+        AutoMergingRetriever.__init__(
+            self,
+            vector_retriever=indexer.as_retriever(**kwargs),
+            storage_context=indexer._storage_context,
+            object_map=indexer._object_map,
+            callback_manager=indexer._callback_manager,
+        )
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "query":
+                # vector_retriever needs to be updated
+                self._vector_retriever = self._index.as_retriever(similarity_top_k=self.topk)
+                return self.retrieve(v)
+
+        return None
+
+
+class SimpleBM25Retriever(BaseComponent):
+    # The nodes parameter in BM25Retriever is not from index,
+    # nodes in BM25Retriever can not be updated through 'indexer.insert_nodes()',
+    # which means nodes should be passed to BM25Retriever after data preparation stage, not init stage
+
+    def __init__(self, indexer, **kwargs):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.RETRIEVER,
+            comp_subtype=RetrieverType.BM25,
+        )
+        self._docstore = indexer._docstore
+        self.topk = kwargs["similarity_top_k"]
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "query":
+                nodes = cast(List[BaseNode], list(self._docstore.docs.values()))
+                bm25_retr = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=self.topk)
+                return bm25_retr.retrieve(v)
+
+        return None
diff --git a/EdgeCraftRAG/edgecraftrag/context.py b/EdgeCraftRAG/edgecraftrag/context.py
new file mode 100644
index 0000000000..3555ce4beb
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/context.py
@@ -0,0 +1,52 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from edgecraftrag.controllers.compmgr import GeneratorMgr, IndexerMgr, NodeParserMgr, PostProcessorMgr, RetrieverMgr
+from edgecraftrag.controllers.filemgr import FilelMgr
+from edgecraftrag.controllers.modelmgr import ModelMgr
+from edgecraftrag.controllers.nodemgr import NodeMgr
+from edgecraftrag.controllers.pipelinemgr import PipelineMgr
+
+
+class Context:
+
+    def __init__(self):
+        self.plmgr = PipelineMgr()
+        self.nodemgr = NodeMgr()
+        self.npmgr = NodeParserMgr()
+        self.idxmgr = IndexerMgr()
+        self.rtvmgr = RetrieverMgr()
+        self.ppmgr = PostProcessorMgr()
+        self.modmgr = ModelMgr()
+        self.genmgr = GeneratorMgr()
+        self.filemgr = FilelMgr()
+
+    def get_pipeline_mgr(self):
+        return self.plmgr
+
+    def get_node_mgr(self):
+        return self.nodemgr
+
+    def get_node_parser_mgr(self):
+        return self.npmgr
+
+    def get_indexer_mgr(self):
+        return self.idxmgr
+
+    def get_retriever_mgr(self):
+        return self.rtvmgr
+
+    def get_postprocessor_mgr(self):
+        return self.ppmgr
+
+    def get_model_mgr(self):
+        return self.modmgr
+
+    def get_generator_mgr(self):
+        return self.genmgr
+
+    def get_file_mgr(self):
+        return self.filemgr
+
+
+ctx = Context()
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/__init__.py b/EdgeCraftRAG/edgecraftrag/controllers/__init__.py
new file mode 100644
index 0000000000..916f3a44b2
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py
new file mode 100644
index 0000000000..b8dd82ab7b
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py
@@ -0,0 +1,66 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from edgecraftrag.api_schema import IndexerIn, ModelIn, NodeParserIn
+from edgecraftrag.base import BaseComponent, BaseMgr, CallbackType, ModelType, NodeParserType
+
+
+class NodeParserMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
+
+    def search_parser(self, npin: NodeParserIn) -> BaseComponent:
+        for _, v in self.components.items():
+            v_parser_type = v.comp_subtype
+            if v_parser_type == npin.parser_type:
+                if v_parser_type == NodeParserType.HIERARCHY and v.chunk_sizes == npin.chunk_sizes:
+                    return v
+                elif v_parser_type == NodeParserType.SENTENCEWINDOW and v.window_size == npin.window_size:
+                    return v
+                elif (
+                    v_parser_type == NodeParserType.SIMPLE
+                    and v.chunk_size == npin.chunk_size
+                    and v.chunk_overlap == npin.chunk_overlap
+                ):
+                    return v
+        return None
+
+
+class IndexerMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
+
+    def search_indexer(self, indin: IndexerIn) -> BaseComponent:
+        for _, v in self.components.items():
+            if v.comp_subtype == indin.indexer_type:
+                if (
+                    hasattr(v, "model")
+                    and v.model
+                    and indin.embedding_model
+                    and (
+                        (v.model.model_id_or_path == indin.embedding_model.model_id)
+                        or (v.model.model_id_or_path == indin.embedding_model.model_path)
+                    )
+                ):
+                    return v
+        return None
+
+
+class RetrieverMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
+
+
+class PostProcessorMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
+
+
+class GeneratorMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py
new file mode 100644
index 0000000000..0278f1f6ac
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py
@@ -0,0 +1,83 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+import os
+from typing import Any, Callable, List, Optional
+
+from edgecraftrag.base import BaseMgr
+from edgecraftrag.components.data import File
+from llama_index.core.schema import Document
+
+
+class FilelMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
+
+    def add_text(self, text: str):
+        file = File(file_name="text", content=text)
+        self.add(file)
+        return file.documents
+
+    def add_files(self, docs: Any):
+        if not isinstance(docs, list):
+            docs = [docs]
+
+        input_docs = []
+        for doc in docs:
+            if not os.path.exists(doc):
+                continue
+
+            if os.path.isfile(doc):
+                files = [doc]
+            elif os.path.isdir(doc):
+                files = [os.path.join(root, f) for root, _, files in os.walk(doc) for f in files]
+            else:
+                continue
+
+            if not files:
+                continue
+
+            for file_path in files:
+                file = File(file_path=file_path)
+                self.add(file)
+                input_docs.extend(file.documents)
+
+        return input_docs
+
+    def get_file_by_name_or_id(self, name: str):
+        for _, file in self.components.items():
+            if file.name == name or file.idx == name:
+                return file
+        return None
+
+    def get_files(self):
+        return [file for _, file in self.components.items()]
+
+    def get_all_docs(self) -> List[Document]:
+        all_docs = []
+        for _, file in self.components.items():
+            all_docs.extend(file.documents)
+        return all_docs
+
+    def get_docs_by_file(self, name) -> List[Document]:
+        file = self.get_file_by_name_or_id(name)
+        return file.documents if file else []
+
+    def del_file(self, name):
+        file = self.get_file_by_name_or_id(name)
+        if file:
+            self.remove(file.idx)
+            return True
+        else:
+            return False
+
+    def update_file(self, name):
+        file = self.get_file_by_name_or_id(name)
+        if file:
+            self.remove(file.idx)
+            self.add_files(docs=name)
+            return True
+        else:
+            return False
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py
new file mode 100644
index 0000000000..73a77e48a8
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py
@@ -0,0 +1,94 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+
+from edgecraftrag.api_schema import IndexerIn, ModelIn, NodeParserIn
+from edgecraftrag.base import BaseComponent, BaseMgr, CallbackType, ModelType
+from edgecraftrag.components.model import OpenVINOEmbeddingModel, OpenVINOLLMModel, OpenVINORerankModel
+
+
+class ModelMgr(BaseMgr):
+
+    def __init__(self):
+        self._lock = asyncio.Lock()
+        super().__init__()
+
+    def get_model_by_name(self, name: str):
+        for _, v in self.components.items():
+            if v.model_id == name:
+                model_type = v.comp_subtype.value
+                model_info = {
+                    "model_type": model_type,
+                    "model_id": getattr(v, "model_id", "Unknown"),
+                }
+                if model_type == ModelType.LLM:
+                    model_info["model_path"] = getattr(v, "model_name", "Unknown")
+                    model_info["device"] = getattr(v, "device_map", "Unknown")
+                else:
+                    model_info["model_path"] = getattr(v, "model_id_or_path", "Unknown")
+                    model_info["device"] = getattr(v, "device", getattr(v, "_device", "Unknown"))
+                return model_info
+        return None
+
+    def get_models(self):
+        model = {}
+        for k, v in self.components.items():
+            # Supplement the information of the model
+            model_type = v.comp_subtype.value
+            model_info = {
+                "model_type": model_type,
+                "model_id": getattr(v, "model_id", "Unknown"),
+            }
+            if model_type == ModelType.LLM:
+                model_info["model_path"] = getattr(v, "model_name", "Unknown")
+                model_info["device"] = getattr(v, "device_map", "Unknown")
+            else:
+                model_info["model_path"] = getattr(v, "model_id_or_path", "Unknown")
+                model_info["device"] = getattr(v, "device", getattr(v, "_device", "Unknown"))
+            model[k] = model_info
+        return model
+
+    def search_model(self, modelin: ModelIn) -> BaseComponent:
+        # Compare model_path and device to search model
+        for _, v in self.components.items():
+            model_path = v.model_name if v.comp_subtype.value == "llm" else v.model_id_or_path
+            model_dev = (
+                v.device_map
+                if v.comp_subtype.value == "llm"
+                else getattr(v, "device", getattr(v, "_device", "Unknown"))
+            )
+            if model_path == modelin.model_path and model_dev == modelin.device:
+                return v
+        return None
+
+    def del_model_by_name(self, name: str):
+        for key, v in self.components.items():
+            if v and v.model_id == name:
+                self.remove(key)
+                return "Model deleted"
+        return "Model not found"
+
+    @staticmethod
+    def load_model(model_para: ModelIn):
+        model = None
+        match model_para.model_type:
+            case ModelType.EMBEDDING:
+                model = OpenVINOEmbeddingModel(
+                    model_id=model_para.model_id,
+                    model_path=model_para.model_path,
+                    device=model_para.device,
+                )
+            case ModelType.RERANKER:
+                model = OpenVINORerankModel(
+                    model_id=model_para.model_id,
+                    model_path=model_para.model_path,
+                    device=model_para.device,
+                )
+            case ModelType.LLM:
+                model = OpenVINOLLMModel(
+                    model_id=model_para.model_id,
+                    model_path=model_para.model_path,
+                    device=model_para.device,
+                )
+        return model
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/nodemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/nodemgr.py
new file mode 100644
index 0000000000..13a41117c7
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/nodemgr.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import List
+
+from edgecraftrag.api_schema import IndexerIn, ModelIn, NodeParserIn
+from edgecraftrag.base import BaseComponent, BaseMgr, CallbackType, ModelType
+from llama_index.core.schema import BaseNode
+
+
+class NodeMgr:
+
+    def __init__(self):
+        self.nodes = {}
+
+    # idx: index of node_parser
+    def add_nodes(self, np_idx, nodes):
+        if np_idx in self.nodes:
+            self.nodes[np_idx].append(nodes)
+        else:
+            self.nodes[np_idx] = nodes
+
+    # TODO: to be implemented
+    def del_nodes(self, nodes):
+        pass
+
+    def del_nodes_by_np_idx(self, np_idx):
+        del self.nodes[np_idx]
+
+    def get_nodes(self, np_idx) -> List[BaseNode]:
+        if np_idx in self.nodes:
+            return self.nodes[np_idx]
+        else:
+            return []
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py
new file mode 100644
index 0000000000..d0b8e07803
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py
@@ -0,0 +1,79 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+from typing import Any, List
+
+from comps.cores.proto.api_protocol import ChatCompletionRequest
+from edgecraftrag.base import BaseMgr, CallbackType
+from edgecraftrag.components.pipeline import Pipeline
+from edgecraftrag.controllers.nodemgr import NodeMgr
+from llama_index.core.schema import Document
+
+
+class PipelineMgr(BaseMgr):
+
+    def __init__(self):
+        self._active_pipeline = None
+        self._lock = asyncio.Lock()
+        super().__init__()
+
+    def create_pipeline(self, name: str):
+        pl = Pipeline(name)
+        self.add(pl)
+        return pl
+
+    def get_pipeline_by_name_or_id(self, name: str):
+        for _, pl in self.components.items():
+            if pl.name == name or pl.idx == name:
+                return pl
+        return None
+
+    def get_pipelines(self):
+        return [pl for _, pl in self.components.items()]
+
+    def activate_pipeline(self, name: str, active: bool, nm: NodeMgr):
+        pl = self.get_pipeline_by_name_or_id(name)
+        nodelist = None
+        if pl is not None:
+            if not active:
+                pl.status.active = False
+                self._active_pipeline = None
+                return
+            if pl.node_changed:
+                nodelist = nm.get_nodes(pl.node_parser.idx)
+        pl.check_active(nodelist)
+        prevactive = self._active_pipeline
+        if prevactive:
+            prevactive.status.active = False
+        pl.status.active = True
+        self._active_pipeline = pl
+
+    def get_active_pipeline(self) -> Pipeline:
+        return self._active_pipeline
+
+    def notify_node_change(self):
+        for _, pl in self.components.items():
+            pl.set_node_change()
+
+    def run_pipeline(self, chat_request: ChatCompletionRequest) -> Any:
+        ap = self.get_active_pipeline()
+        out = None
+        if ap is not None:
+            out = ap.run(cbtype=CallbackType.PIPELINE, chat_request=chat_request)
+            return out
+        return -1
+
+    def run_retrieve(self, chat_request: ChatCompletionRequest) -> Any:
+        ap = self.get_active_pipeline()
+        out = None
+        if ap is not None:
+            out = ap.run(cbtype=CallbackType.RETRIEVE, chat_request=chat_request)
+            return out
+        return -1
+
+    def run_data_prepare(self, docs: List[Document]) -> Any:
+        ap = self.get_active_pipeline()
+        if ap is not None:
+            return ap.run(cbtype=CallbackType.DATAPREP, docs=docs)
+        return -1
diff --git a/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt b/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt
new file mode 100644
index 0000000000..800d1fa2f2
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt
@@ -0,0 +1,8 @@
+<|im_start|>System: You are an AI assistant. Your task is to learn from the following context. Then answer the user's question based on what you learned from the context but not your own knowledge.<|im_end|>
+
+<|im_start|>{context}<|im_end|>
+
+<|im_start|>System: Pay attention to your formatting of response. If you need to reference content from context, try to keep the formatting.<|im_end|>
+<|im_start|>System: Try to summarize from the context, do some reasoning before response, then response. Make sure your response is logically sound and self-consistent.<|im_end|>
+
+<|im_start|>{input}
diff --git a/EdgeCraftRAG/edgecraftrag/requirements.txt b/EdgeCraftRAG/edgecraftrag/requirements.txt
new file mode 100644
index 0000000000..3756c732ad
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/requirements.txt
@@ -0,0 +1,16 @@
+docx2txt
+faiss-cpu>=1.8.0.post1
+gradio>=4.44.1
+langchain-core==0.2.29
+llama-index>=0.11.0
+llama-index-embeddings-openvino>=0.4.0
+llama-index-llms-openai-like>=0.2.0
+llama-index-llms-openvino>=0.3.1
+llama-index-postprocessor-openvino-rerank>=0.3.0
+llama-index-retrievers-bm25>=0.3.0
+llama-index-vector-stores-faiss>=0.2.1
+loguru>=0.7.2
+omegaconf>=2.3.0
+opea-comps>=0.9
+py-cpuinfo>=9.0.0
+uvicorn>=0.30.6
diff --git a/EdgeCraftRAG/edgecraftrag/server.py b/EdgeCraftRAG/edgecraftrag/server.py
new file mode 100644
index 0000000000..705c3f07ba
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/server.py
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import uvicorn
+from edgecraftrag.api.v1.chatqna import chatqna_app
+from edgecraftrag.api.v1.data import data_app
+from edgecraftrag.api.v1.model import model_app
+from edgecraftrag.api.v1.pipeline import pipeline_app
+from fastapi import FastAPI
+from llama_index.core.settings import Settings
+
+app = FastAPI()
+
+sub_apps = [data_app, model_app, pipeline_app, chatqna_app]
+for sub_app in sub_apps:
+    for route in sub_app.routes:
+        app.router.routes.append(route)
+
+
+if __name__ == "__main__":
+    Settings.llm = None
+
+    host = os.getenv("PIPELINE_SERVICE_HOST_IP", "0.0.0.0")
+    port = int(os.getenv("PIPELINE_SERVICE_PORT", 16010))
+    uvicorn.run(app, host=host, port=port)
diff --git a/EdgeCraftRAG/tests/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/test_pipeline_local_llm.json
new file mode 100644
index 0000000000..18895d6e50
--- /dev/null
+++ b/EdgeCraftRAG/tests/test_pipeline_local_llm.json
@@ -0,0 +1,41 @@
+{
+  "name": "rag_test_local_llm",
+  "node_parser": {
+    "chunk_size": 400,
+    "chunk_overlap": 48,
+    "parser_type": "simple"
+  },
+  "indexer": {
+    "indexer_type": "faiss_vector",
+    "embedding_model": {
+      "model_id": "BAAI/bge-small-en-v1.5",
+      "model_path": "./models/bge_ov_embedding",
+      "device": "auto"
+    }
+  },
+  "retriever": {
+    "retriever_type": "vectorsimilarity",
+    "retrieve_topk": 30
+  },
+  "postprocessor": [
+    {
+      "processor_type": "reranker",
+      "top_n": 2,
+      "reranker_model": {
+        "model_id": "BAAI/bge-reranker-large",
+        "model_path": "./models/bge_ov_reranker",
+        "device": "auto"
+      }
+    }
+  ],
+  "generator": {
+    "model": {
+      "model_id": "Qwen/Qwen2-7B-Instruct",
+      "model_path": "./models/qwen2-7b-instruct/INT4_compressed_weights",
+      "device": "cpu"
+    },
+    "prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
+    "inference_type": "local"
+  },
+  "active": "True"
+}
diff --git a/EdgeCraftRAG/ui/docker/Dockerfile.ui b/EdgeCraftRAG/ui/docker/Dockerfile.ui
new file mode 100644
index 0000000000..46a14a6e94
--- /dev/null
+++ b/EdgeCraftRAG/ui/docker/Dockerfile.ui
@@ -0,0 +1,23 @@
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev 
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+COPY ./ui/gradio /home/user/ui
+COPY ./edgecraftrag /home/user/edgecraftrag
+
+WORKDIR /home/user/edgecraftrag
+RUN pip install --no-cache-dir -r requirements.txt
+
+WORKDIR /home/user/ui
+
+USER user
+
+RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
+
+ENTRYPOINT ["python", "ecragui.py"]
diff --git a/EdgeCraftRAG/ui/gradio/__init__.py b/EdgeCraftRAG/ui/gradio/__init__.py
new file mode 100644
index 0000000000..916f3a44b2
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-ondark-3000.png b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-ondark-3000.png
new file mode 100644
index 0000000000..527b9ad94c
Binary files /dev/null and b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-ondark-3000.png differ
diff --git a/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-onlight-3000.png b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-onlight-3000.png
new file mode 100644
index 0000000000..707ddd251b
Binary files /dev/null and b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-onlight-3000.png differ
diff --git a/EdgeCraftRAG/ui/gradio/config.py b/EdgeCraftRAG/ui/gradio/config.py
new file mode 100644
index 0000000000..477aba7c24
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/config.py
@@ -0,0 +1,358 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+DEFAULT_SYSTEM_PROMPT = """\
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+If a question does not make any sense or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\
+"""
+
+DEFAULT_SYSTEM_PROMPT_CHINESE = """\
+你是一个乐于助人、尊重他人以及诚实可靠的助手。在安全的情况下，始终尽可能有帮助地回答。 您的回答不应包含任何有害、不道德、种族主义、性别歧视、有毒、危险或非法的内容。请确保您的回答在社会上是公正的和积极的。
+如果一个问题没有任何意义或与事实不符，请解释原因，而不是回答错误的问题。如果您不知道问题的答案，请不要分享虚假信息。另外，答案请使用中文。\
+"""
+
+DEFAULT_SYSTEM_PROMPT_JAPANESE = """\
+あなたは親切で、礼儀正しく、誠実なアシスタントです。 常に安全を保ちながら、できるだけ役立つように答えてください。 回答には、有害、非倫理的、人種差別的、性差別的、有毒、危険、または違法なコンテンツを含めてはいけません。 回答は社会的に偏見がなく、本質的に前向きなものであることを確認してください。
+質問が意味をなさない場合、または事実に一貫性がない場合は、正しくないことに答えるのではなく、その理由を説明してください。 質問の答えがわからない場合は、誤った情報を共有しないでください。\
+"""
+
+DEFAULT_RAG_PROMPT = """\
+You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\
+"""
+
+DEFAULT_RAG_PROMPT_CHINESE = """\
+基于以下已知信息，请简洁并专业地回答用户的问题。如果无法从中得到答案，请说 "根据已知信息无法回答该问题" 或 "没有提供足够的相关信息"。不允许在答案中添加编造成分。另外，答案请使用中文。\
+"""
+
+
+def red_pijama_partial_text_processor(partial_text, new_text):
+    if new_text == "<":
+        return partial_text
+
+    partial_text += new_text
+    return partial_text.split("<bot>:")[-1]
+
+
+def llama_partial_text_processor(partial_text, new_text):
+    new_text = new_text.replace("[INST]", "").replace("[/INST]", "")
+    partial_text += new_text
+    return partial_text
+
+
+def chatglm_partial_text_processor(partial_text, new_text):
+    new_text = new_text.strip()
+    new_text = new_text.replace("[[训练时间]]", "2023年")
+    partial_text += new_text
+    return partial_text
+
+
+def youri_partial_text_processor(partial_text, new_text):
+    new_text = new_text.replace("システム:", "")
+    partial_text += new_text
+    return partial_text
+
+
+def internlm_partial_text_processor(partial_text, new_text):
+    partial_text += new_text
+    return partial_text.split("<|im_end|>")[0]
+
+
+SUPPORTED_LLM_MODELS = {
+    "English": {
+        "tiny-llama-1b-chat": {
+            "model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+            "remote_code": False,
+            "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n",
+            "history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n",
+            "current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}",
+            "rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>"""
+            + """
+            <|user|>
+            Question: {input}
+            Context: {context}
+            Answer: </s>
+            <|assistant|>""",
+        },
+        "gemma-2b-it": {
+            "model_id": "google/gemma-2b-it",
+            "remote_code": False,
+            "start_message": DEFAULT_SYSTEM_PROMPT + ", ",
+            "history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>",
+            "current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}",
+            "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""
+            + """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""",
+        },
+        "red-pajama-3b-chat": {
+            "model_id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
+            "remote_code": False,
+            "start_message": "",
+            "history_template": "\n<human>:{user}\n<bot>:{assistant}",
+            "stop_tokens": [29, 0],
+            "partial_text_processor": red_pijama_partial_text_processor,
+            "current_message_template": "\n<human>:{user}\n<bot>:{assistant}",
+            "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT }"""
+            + """
+            <human>: Question: {input}
+            Context: {context}
+            Answer: <bot>""",
+        },
+        "gemma-7b-it": {
+            "model_id": "google/gemma-7b-it",
+            "remote_code": False,
+            "start_message": DEFAULT_SYSTEM_PROMPT + ", ",
+            "history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>",
+            "current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}",
+            "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""
+            + """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""",
+        },
+        "llama-2-chat-7b": {
+            "model_id": "meta-llama/Llama-2-7b-chat-hf",
+            "remote_code": False,
+            "start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n",
+            "history_template": "{user}[/INST]{assistant}</s><s>[INST]",
+            "current_message_template": "{user} [/INST]{assistant}",
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "partial_text_processor": llama_partial_text_processor,
+            "rag_prompt_template": f"""[INST]Human: <<SYS>> {DEFAULT_RAG_PROMPT }<</SYS>>"""
+            + """
+            Question: {input}
+            Context: {context}
+            Answer: [/INST]""",
+        },
+        "mpt-7b-chat": {
+            "model_id": "mosaicml/mpt-7b-chat",
+            "remote_code": False,
+            "start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT }<|im_end|>",
+            "history_template": "<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}<|im_end|>",
+            "current_message_template": '"<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}',
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+            "rag_prompt_template": f"""<|im_start|>system
+            {DEFAULT_RAG_PROMPT }<|im_end|>"""
+            + """
+            <|im_start|>user
+            Question: {input}
+            Context: {context}
+            Answer: <im_end><|im_start|>assistant""",
+        },
+        "mistral-7b": {
+            "model_id": "mistralai/Mistral-7B-v0.1",
+            "remote_code": False,
+            "start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n",
+            "history_template": "{user}[/INST]{assistant}</s><s>[INST]",
+            "current_message_template": "{user} [/INST]{assistant}",
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "partial_text_processor": llama_partial_text_processor,
+            "rag_prompt_template": f"""<s> [INST] {DEFAULT_RAG_PROMPT } [/INST] </s>"""
+            + """
+            [INST] Question: {input}
+            Context: {context}
+            Answer: [/INST]""",
+        },
+        "zephyr-7b-beta": {
+            "model_id": "HuggingFaceH4/zephyr-7b-beta",
+            "remote_code": False,
+            "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n",
+            "history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n",
+            "current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}",
+            "rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>"""
+            + """
+            <|user|>
+            Question: {input}
+            Context: {context}
+            Answer: </s>
+            <|assistant|>""",
+        },
+        "notus-7b-v1": {
+            "model_id": "argilla/notus-7b-v1",
+            "remote_code": False,
+            "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n",
+            "history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n",
+            "current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}",
+            "rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>"""
+            + """
+            <|user|>
+            Question: {input}
+            Context: {context}
+            Answer: </s>
+            <|assistant|>""",
+        },
+        "neural-chat-7b-v3-1": {
+            "model_id": "Intel/neural-chat-7b-v3-3",
+            "remote_code": False,
+            "start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n",
+            "history_template": "{user}[/INST]{assistant}</s><s>[INST]",
+            "current_message_template": "{user} [/INST]{assistant}",
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "partial_text_processor": llama_partial_text_processor,
+            "rag_prompt_template": f"""<s> [INST] {DEFAULT_RAG_PROMPT } [/INST] </s>"""
+            + """
+            [INST] Question: {input}
+            Context: {context}
+            Answer: [/INST]""",
+        },
+    },
+    "Chinese": {
+        "qwen1.5-0.5b-chat": {
+            "model_id": "Qwen/Qwen1.5-0.5B-Chat",
+            "remote_code": False,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+        },
+        "qwen1.5-7b-chat": {
+            "model_id": "Qwen/Qwen1.5-7B-Chat",
+            "remote_code": False,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+            "summarization_prompt_template": """
+            <|im_start|>user
+            问题: 总结下文内容，不少于{character_num}字.
+            已知内容: {text}
+            回答: <|im_end|><|im_start|>assistant""",
+            "split_summary_template": """
+            <|im_start|>user
+            问题: 根据已知内容写一篇简短的摘要.
+            已知内容: {text}
+            回答: <|im_end|><|im_start|>assistant""",
+            "combine_summary_template": """
+            <|im_start|>user
+            问题: 根据已知内容写一篇摘要,不少于{character_num}字.
+            已知内容: {text}
+            回答: <|im_end|><|im_start|>assistant""",
+            "rag_prompt_template": f"""<|im_start|>system
+            {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
+            + """
+            <|im_start|>user
+            问题: {input}
+            已知内容: {context}
+            回答: <|im_end|><|im_start|>assistant""",
+        },
+        "qwen-7b-chat": {
+            "model_id": "Qwen/Qwen-7B-Chat",
+            "remote_code": True,
+            "start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT_CHINESE }<|im_end|>",
+            "history_template": "<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}<|im_end|>",
+            "current_message_template": '"<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}',
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+            "revision": "2abd8e5777bb4ce9c8ab4be7dbbd0fe4526db78d",
+            "rag_prompt_template": f"""<|im_start|>system
+            {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
+            + """
+            <|im_start|>user
+            问题: {input}
+            已知内容: {context}
+            回答: <|im_end|><|im_start|>assistant""",
+        },
+        "qwen2-7b-instruct": {
+            "model_id": "Qwen/Qwen2-7B-Instruct",
+            "remote_code": True,
+            "start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT_CHINESE }<|im_end|>",
+            "history_template": "<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}<|im_end|>",
+            "current_message_template": '"<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}',
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+            "revision": "2abd8e5777bb4ce9c8ab4be7dbbd0fe4526db78d",
+            "rag_prompt_template": f"""<|im_start|>system
+            {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
+            + """
+            <|im_start|>user
+            问题: {input}
+            已知内容: {context}
+            回答: <|im_end|><|im_start|>assistant""",
+        },
+        "chatglm3-6b": {
+            "model_id": "THUDM/chatglm3-6b",
+            "remote_code": True,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "stop_tokens": [0, 2],
+            "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }"""
+            + """
+            问题: {input}
+            已知内容: {context}
+            回答:
+            """,
+        },
+        "baichuan2-7b-chat": {
+            "model_id": "baichuan-inc/Baichuan2-7B-Chat",
+            "remote_code": True,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "stop_tokens": [0, 2],
+            "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }"""
+            + """
+            问题: {input}
+            已知内容: {context}
+            回答:
+            """,
+        },
+        "minicpm-2b-dpo": {
+            "model_id": "openbmb/MiniCPM-2B-dpo-fp16",
+            "remote_code": True,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "stop_tokens": [2],
+        },
+        "internlm2-chat-1.8b": {
+            "model_id": "internlm/internlm2-chat-1_8b",
+            "remote_code": True,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "stop_tokens": [2, 92542],
+            "partial_text_processor": internlm_partial_text_processor,
+        },
+        "qwen1.5-1.8b-chat": {
+            "model_id": "Qwen/Qwen1.5-1.8B-Chat",
+            "remote_code": False,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+            "rag_prompt_template": f"""<|im_start|>system
+            {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
+            + """
+            <|im_start|>user
+            问题: {input}
+            已知内容: {context}
+            回答: <|im_end|><|im_start|>assistant""",
+        },
+    },
+    "Japanese": {
+        "youri-7b-chat": {
+            "model_id": "rinna/youri-7b-chat",
+            "remote_code": False,
+            "start_message": f"設定: {DEFAULT_SYSTEM_PROMPT_JAPANESE}\n",
+            "history_template": "ユーザー: {user}\nシステム: {assistant}\n",
+            "current_message_template": "ユーザー: {user}\nシステム: {assistant}",
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "partial_text_processor": youri_partial_text_processor,
+        },
+    },
+}
+
+SUPPORTED_EMBEDDING_MODELS = {
+    "English": {
+        "bge-small-en-v1.5": {
+            "model_id": "BAAI/bge-small-en-v1.5",
+            "mean_pooling": False,
+            "normalize_embeddings": True,
+        },
+        "bge-large-en-v1.5": {
+            "model_id": "BAAI/bge-large-en-v1.5",
+            "mean_pooling": False,
+            "normalize_embeddings": True,
+        },
+    },
+    "Chinese": {
+        "bge-small-zh-v1.5": {
+            "model_id": "BAAI/bge-small-zh-v1.5",
+            "mean_pooling": False,
+            "normalize_embeddings": True,
+        },
+        "bge-large-zh-v1.5": {
+            "model_id": "bge-large-zh-v1.5",
+            "mean_pooling": False,
+            "normalize_embeddings": True,
+        },
+    },
+}
+
+
+SUPPORTED_RERANK_MODELS = {
+    "bge-reranker-large": {"model_id": "BAAI/bge-reranker-large"},
+    "bge-reranker-base": {"model_id": "BAAI/bge-reranker-base"},
+}
diff --git a/EdgeCraftRAG/ui/gradio/default.yaml b/EdgeCraftRAG/ui/gradio/default.yaml
new file mode 100644
index 0000000000..1421da8f47
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/default.yaml
@@ -0,0 +1,49 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Model language for LLM
+model_language: "Chinese"
+vector_db:  "FAISS"
+splitter_name: "RecursiveCharacter"
+k_rerank: 5
+search_method: "similarity"
+score_threshold: 0.5
+bm25_weight: 0
+
+# Pipeline
+name: "default"
+
+# Node parser
+node_parser: "simple"
+chunk_size: 192
+chunk_overlap: 48
+
+# Indexer
+indexer: "faiss_vector"
+
+# Retriever
+retriever: "vectorsimilarity"
+k_retrieval: 30
+
+# Post Processor
+postprocessor: "reranker"
+
+# Generator
+generator: "local"
+prompt_path: "./data/default_prompt.txt"
+
+# Models
+embedding_model_id: "BAAI/bge-small-en-v1.5"
+embedding_model_path: "./bge_ov_embedding"
+# Device for embedding model inference
+embedding_device: "AUTO"
+
+rerank_model_id: "BAAI/bge-reranker-large"
+rerank_model_path: "./bge_ov_reranker"
+# Device for reranking model inference
+rerank_device: "AUTO"
+
+llm_model_id: "qwen2-7b-instruct"
+llm_model_path: "./qwen2-7b-instruct/INT4_compressed_weights"
+# Device for LLM model inference
+llm_device: "AUTO"
diff --git a/EdgeCraftRAG/ui/gradio/ecrag_client.py b/EdgeCraftRAG/ui/gradio/ecrag_client.py
new file mode 100644
index 0000000000..47b5f776d7
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/ecrag_client.py
@@ -0,0 +1,124 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import sys
+
+import requests
+
+sys.path.append("..")
+import os
+
+from edgecraftrag import api_schema
+
+PIPELINE_SERVICE_HOST_IP = os.getenv("PIPELINE_SERVICE_HOST_IP", "127.0.0.1")
+PIPELINE_SERVICE_PORT = int(os.getenv("PIPELINE_SERVICE_PORT", 16010))
+server_addr = f"http://{PIPELINE_SERVICE_HOST_IP}:{PIPELINE_SERVICE_PORT}"
+
+
+def get_current_pipelines():
+    res = requests.get(f"{server_addr}/v1/settings/pipelines", proxies={"http": None})
+    pls = []
+    for pl in res.json():
+        if pl["status"]["active"]:
+            pls.append((pl["idx"], pl["name"] + " (active)"))
+        else:
+            pls.append((pl["idx"], pl["name"]))
+    return pls
+
+
+def get_pipeline(name):
+    res = requests.get(f"{server_addr}/v1/settings/pipelines/{name}", proxies={"http": None})
+    return res.json()
+
+
+def create_update_pipeline(
+    name,
+    active,
+    node_parser,
+    chunk_size,
+    chunk_overlap,
+    indexer,
+    retriever,
+    vector_search_top_k,
+    postprocessor,
+    generator,
+    llm_id,
+    llm_device,
+    llm_weights,
+    embedding_id,
+    embedding_device,
+    rerank_id,
+    rerank_device,
+):
+    req_dict = api_schema.PipelineCreateIn(
+        name=name,
+        active=active,
+        node_parser=api_schema.NodeParserIn(
+            parser_type=node_parser, chunk_size=chunk_size, chunk_overlap=chunk_overlap
+        ),
+        indexer=api_schema.IndexerIn(
+            indexer_type=indexer,
+            embedding_model=api_schema.ModelIn(
+                model_id=embedding_id,
+                # TODO: remove hardcoding
+                model_path="./bge_ov_embedding",
+                device=embedding_device,
+            ),
+        ),
+        retriever=api_schema.RetrieverIn(retriever_type=retriever, retriever_topk=vector_search_top_k),
+        postprocessor=[
+            api_schema.PostProcessorIn(
+                processor_type=postprocessor[0],
+                reranker_model=api_schema.ModelIn(
+                    model_id=rerank_id,
+                    # TODO: remove hardcoding
+                    model_path="./bge_ov_reranker",
+                    device=rerank_device,
+                ),
+            )
+        ],
+        generator=api_schema.GeneratorIn(
+            # TODO: remove hardcoding
+            prompt_path="./edgecraftrag/prompt_template/default_prompt.txt",
+            model=api_schema.ModelIn(
+                model_id=llm_id,
+                # TODO: remove hardcoding
+                model_path="./models/qwen2-7b-instruct/INT4_compressed_weights",
+                device=llm_device,
+            ),
+        ),
+    )
+    # hard code only for test
+    print(req_dict)
+    res = requests.post(f"{server_addr}/v1/settings/pipelines", json=req_dict.dict(), proxies={"http": None})
+    return res.text
+
+
+def activate_pipeline(name):
+    active_dict = {"active": "True"}
+    res = requests.patch(f"{server_addr}/v1/settings/pipelines/{name}", json=active_dict, proxies={"http": None})
+    status = False
+    restext = f"Activate pipeline {name} failed."
+    if res.ok:
+        status = True
+        restext = f"Activate pipeline {name} successfully."
+    return restext, status
+
+
+def create_vectordb(docs, spliter, vector_db):
+    req_dict = api_schema.FilesIn(local_paths=docs)
+    res = requests.post(f"{server_addr}/v1/data/files", json=req_dict.dict(), proxies={"http": None})
+    return res.text
+
+
+def get_files():
+    res = requests.get(f"{server_addr}/v1/data/files", proxies={"http": None})
+    files = []
+    for file in res.json():
+        files.append((file["file_name"], file["file_id"]))
+    return files
+
+
+def delete_file(file_name_or_id):
+    res = requests.delete(f"{server_addr}/v1/data/files/{file_name_or_id}", proxies={"http": None})
+    return res.text
diff --git a/EdgeCraftRAG/ui/gradio/ecragui.py b/EdgeCraftRAG/ui/gradio/ecragui.py
new file mode 100644
index 0000000000..3c198bf2a9
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/ecragui.py
@@ -0,0 +1,983 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+import json
+import platform
+import re
+from datetime import datetime
+from pathlib import Path
+
+import cpuinfo
+import distro  # if running Python 3.8 or above
+import ecrag_client as cli
+import gradio as gr
+import httpx
+
+# Creation of the ModelLoader instance and loading models remain the same
+import platform_config as pconf
+import psutil
+import requests
+from loguru import logger
+from omegaconf import OmegaConf
+from platform_config import get_available_devices, get_available_weights, get_local_available_models
+
+pipeline_df = []
+
+import os
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "127.0.0.1")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 16011))
+UI_SERVICE_HOST_IP = os.getenv("UI_SERVICE_HOST_IP", "0.0.0.0")
+UI_SERVICE_PORT = int(os.getenv("UI_SERVICE_PORT", 8084))
+
+
+def get_llm_model_dir(llm_model_id, weights_compression):
+    model_dirs = {
+        "fp16_model_dir": Path(llm_model_id) / "FP16",
+        "int8_model_dir": Path(llm_model_id) / "INT8_compressed_weights",
+        "int4_model_dir": Path(llm_model_id) / "INT4_compressed_weights",
+    }
+
+    if weights_compression == "INT4":
+        model_dir = model_dirs["int4_model_dir"]
+    elif weights_compression == "INT8":
+        model_dir = model_dirs["int8_model_dir"]
+    else:
+        model_dir = model_dirs["fp16_model_dir"]
+
+    if not model_dir.exists():
+        raise FileNotFoundError(f"The model directory {model_dir} does not exist.")
+    elif not model_dir.is_dir():
+        raise NotADirectoryError(f"The path {model_dir} is not a directory.")
+
+    return model_dir
+
+
+def get_system_status():
+    cpu_usage = psutil.cpu_percent(interval=1)
+    memory_info = psutil.virtual_memory()
+    memory_usage = memory_info.percent
+    memory_total_gb = memory_info.total / (1024**3)
+    memory_used_gb = memory_info.used / (1024**3)
+    # uptime_seconds = time.time() - psutil.boot_time()
+    # uptime_hours, uptime_minutes = divmod(uptime_seconds // 60, 60)
+    disk_usage = psutil.disk_usage("/").percent
+    # net_io = psutil.net_io_counters()
+    os_info = platform.uname()
+    kernel_version = os_info.release
+    processor = cpuinfo.get_cpu_info()["brand_raw"]
+    dist_name = distro.name(pretty=True)
+
+    now = datetime.now()
+    current_time_str = now.strftime("%Y-%m-%d %H:%M")
+
+    status = (
+        f"{current_time_str} \t"
+        f"CPU Usage: {cpu_usage}% \t"
+        f"Memory Usage: {memory_usage}% {memory_used_gb:.2f}GB / {memory_total_gb:.2f}GB \t"
+        # f"System Uptime: {int(uptime_hours)} hours, {int(uptime_minutes)} minutes \t"
+        f"Disk Usage: {disk_usage}% \t"
+        # f"Bytes Sent: {net_io.bytes_sent}\n"
+        # f"Bytes Received: {net_io.bytes_recv}\n"
+        f"Kernel: {kernel_version} \t"
+        f"Processor: {processor} \t"
+        f"OS: {dist_name} \n"
+    )
+    return status
+
+
+def build_demo(cfg, args):
+
+    def load_chatbot_models(
+        llm_id,
+        llm_device,
+        llm_weights,
+        embedding_id,
+        embedding_device,
+        rerank_id,
+        rerank_device,
+    ):
+        req_dict = {
+            "llm_id": llm_id,
+            "llm_device": llm_device,
+            "llm_weights": llm_weights,
+            "embedding_id": embedding_id,
+            "embedding_device": embedding_device,
+            "rerank_id": rerank_id,
+            "rerank_device": rerank_device,
+        }
+        # hard code only for test
+        worker_addr = "http://127.0.0.1:8084"
+        print(req_dict)
+        result = requests.post(f"{worker_addr}/load", json=req_dict, proxies={"http": None})
+        return result.text
+
+    def user(message, history):
+        """Callback function for updating user messages in interface on submit button click.
+
+        Params:
+        message: current message
+        history: conversation history
+        Returns:
+        None
+        """
+        # Append the user's message to the conversation history
+        return "", history + [[message, ""]]
+
+    async def bot(
+        history,
+        temperature,
+        top_p,
+        top_k,
+        repetition_penalty,
+        hide_full_prompt,
+        do_rag,
+        docs,
+        spliter_name,
+        vector_db,
+        chunk_size,
+        chunk_overlap,
+        vector_search_top_k,
+        vector_search_top_n,
+        run_rerank,
+        search_method,
+        score_threshold,
+    ):
+        """Callback function for running chatbot on submit button click.
+
+        Params:
+        history: conversation history
+        temperature:  parameter for control the level of creativity in AI-generated text.
+                        By adjusting the `temperature`, you can influence the AI model's probability distribution, making the text more focused or diverse.
+        top_p: parameter for control the range of tokens considered by the AI model based on their cumulative probability.
+        top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability.
+        repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text.
+        conversation_id: unique conversation identifier.
+        """
+        # req_dict = {
+        #     "history": history,
+        #     "temperature": temperature,
+        #     "top_p": top_p,
+        #     "top_k": top_k,
+        #     "repetition_penalty": repetition_penalty,
+        #     "hide_full_prompt": hide_full_prompt,
+        #     "do_rag": do_rag,
+        #     "docs": docs,
+        #     "spliter_name": spliter_name,
+        #     "vector_db": vector_db,
+        #     "chunk_size": chunk_size,
+        #     "chunk_overlap": chunk_overlap,
+        #     "vector_search_top_k": vector_search_top_k,
+        #     "vector_search_top_n": vector_search_top_n,
+        #     "run_rerank": run_rerank,
+        #     "search_method": search_method,
+        #     "score_threshold": score_threshold,
+        #     "streaming": True
+        # }
+        print(history)
+        new_req = {"messages": history[-1][0]}
+        server_addr = f"http://{MEGA_SERVICE_HOST_IP}:{MEGA_SERVICE_PORT}"
+
+        # Async for streaming response
+        partial_text = ""
+        async with httpx.AsyncClient() as client:
+            async with client.stream("POST", f"{server_addr}/v1/chatqna", json=new_req, timeout=None) as response:
+                partial_text = ""
+                async for chunk in response.aiter_lines():
+                    new_text = chunk
+                    if new_text.startswith("data"):
+                        new_text = re.sub(r"\r\n", "", chunk.split("data: ")[-1])
+                    new_text = json.loads(chunk)["choices"][0]["message"]["content"]
+                    partial_text = partial_text + new_text
+                    history[-1][1] = partial_text
+                    yield history
+
+    avail_llms = get_local_available_models("llm")
+    avail_embed_models = get_local_available_models("embed")
+    avail_rerank_models = get_local_available_models("rerank")
+    avail_devices = get_available_devices()
+    avail_weights_compression = get_available_weights()
+    avail_node_parsers = pconf.get_available_node_parsers()
+    avail_indexers = pconf.get_available_indexers()
+    avail_retrievers = pconf.get_available_retrievers()
+    avail_postprocessors = pconf.get_available_postprocessors()
+    avail_generators = pconf.get_available_generators()
+
+    css = """
+    .feedback textarea {font-size: 18px; !important }
+    #blude_border {border: 1px solid #0000FF}
+    #white_border {border: 2px solid #FFFFFF}
+    .test textarea {color: E0E0FF; border: 1px solid #0000FF}
+    .disclaimer {font-variant-caps: all-small-caps}
+    """
+
+    with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+        gr.HTML(
+            """
+            <!DOCTYPE html>
+            <html>
+            <head>
+            <style>
+            .container {
+                display: flex; /* Establish a flex container */
+                align-items: center; /* Vertically align everything in the middle */
+                width: 100%; /* Take the full width of the container */
+            }
+
+            .title-container {
+                flex-grow: 1; /* Allow the title to grow and occupy the available space */
+                text-align: center; /* Center the text block inside the title container */
+            }
+
+            .title-line {
+                display: block; /* Makes the span behave like a div in terms of layout */
+                line-height: 1.2; /* Adjust this value as needed for better appearance */
+            }
+
+            img {
+                /* Consider setting a specific width or height if necessary */
+            }
+            </style>
+            </head>
+            <body>
+
+            <div class="container">
+            <!-- Image aligned to the left -->
+            <a href="https://www.intel.cn/content/www/cn/zh/artificial-intelligence/overview.html"><img src="/file/assets/ai-logo-inline-onlight-3000.png" alt="Sample Image" width="200"></a>
+
+            <!-- Title centered in the remaining space -->
+                <!-- Title container centered in the remaining space -->
+                <div class="title-container">
+                    <span class="title-line"><h1 >Edge Craft RAG based Q&A Chatbot</h1></span>
+                    <span class="title-line"><h5 style="margin: 0;">Powered by Intel NEXC Edge AI solutions</h5></span>
+                </div>
+            </div>
+
+            </body>
+            </html>
+            """
+        )
+        _ = gr.Textbox(
+            label="System Status",
+            value=get_system_status,
+            max_lines=1,
+            every=1,
+            info="",
+            elem_id="white_border",
+        )
+
+        def get_pipeline_df():
+            global pipeline_df
+            pipeline_df = cli.get_current_pipelines()
+            return pipeline_df
+
+        # -------------------
+        # RAG Settings Layout
+        # -------------------
+        with gr.Tab("RAG Settings"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    u_pipelines = gr.Dataframe(
+                        headers=["ID", "Name"],
+                        column_widths=[70, 30],
+                        value=get_pipeline_df,
+                        label="Pipelines",
+                        show_label=True,
+                        interactive=False,
+                        every=5,
+                    )
+
+                    u_rag_pipeline_status = gr.Textbox(label="Status", value="", interactive=False)
+
+                with gr.Column(scale=3):
+                    with gr.Accordion("Pipeline Configuration"):
+                        with gr.Row():
+                            rag_create_pipeline = gr.Button("Create Pipeline")
+                            rag_activate_pipeline = gr.Button("Activate Pipeline")
+                            rag_remove_pipeline = gr.Button("Remove Pipeline")
+
+                        with gr.Column(variant="panel"):
+                            u_pipeline_name = gr.Textbox(
+                                label="Name",
+                                value=cfg.name,
+                                interactive=True,
+                            )
+                            u_active = gr.Checkbox(
+                                value=True,
+                                label="Activated",
+                                interactive=True,
+                            )
+
+                        with gr.Column(variant="panel"):
+                            with gr.Accordion("Node Parser"):
+                                u_node_parser = gr.Dropdown(
+                                    choices=avail_node_parsers,
+                                    label="Node Parser",
+                                    value=cfg.node_parser,
+                                    info="Select a parser to split documents.",
+                                    multiselect=False,
+                                    interactive=True,
+                                )
+                                u_chunk_size = gr.Slider(
+                                    label="Chunk size",
+                                    value=cfg.chunk_size,
+                                    minimum=100,
+                                    maximum=2000,
+                                    step=50,
+                                    interactive=True,
+                                    info="Size of sentence chunk",
+                                )
+
+                                u_chunk_overlap = gr.Slider(
+                                    label="Chunk overlap",
+                                    value=cfg.chunk_overlap,
+                                    minimum=0,
+                                    maximum=400,
+                                    step=1,
+                                    interactive=True,
+                                    info=("Overlap between 2 chunks"),
+                                )
+
+                        with gr.Column(variant="panel"):
+                            with gr.Accordion("Indexer"):
+                                u_indexer = gr.Dropdown(
+                                    choices=avail_indexers,
+                                    label="Indexer",
+                                    value=cfg.indexer,
+                                    info="Select an indexer for indexing content of the documents.",
+                                    multiselect=False,
+                                    interactive=True,
+                                )
+
+                                with gr.Accordion("Embedding Model Configuration"):
+                                    u_embed_model_id = gr.Dropdown(
+                                        choices=avail_embed_models,
+                                        value=cfg.embedding_model_id,
+                                        label="Embedding Model",
+                                        # info="Select a Embedding Model",
+                                        multiselect=False,
+                                        allow_custom_value=True,
+                                    )
+
+                                    u_embed_device = gr.Dropdown(
+                                        choices=avail_devices,
+                                        value=cfg.embedding_device,
+                                        label="Embedding run device",
+                                        # info="Run embedding model on which device?",
+                                        multiselect=False,
+                                    )
+
+                        with gr.Column(variant="panel"):
+                            with gr.Accordion("Retriever"):
+                                u_retriever = gr.Dropdown(
+                                    choices=avail_retrievers,
+                                    value=cfg.retriever,
+                                    label="Retriever",
+                                    info="Select a retriever for retrieving context.",
+                                    multiselect=False,
+                                    interactive=True,
+                                )
+                                u_vector_search_top_k = gr.Slider(
+                                    1,
+                                    50,
+                                    value=cfg.k_retrieval,
+                                    step=1,
+                                    label="Search top k",
+                                    info="Number of searching results, must >= Rerank top n",
+                                    interactive=True,
+                                )
+
+                        with gr.Column(variant="panel"):
+                            with gr.Accordion("Postprocessor"):
+                                u_postprocessor = gr.Dropdown(
+                                    choices=avail_postprocessors,
+                                    value=cfg.postprocessor,
+                                    label="Postprocessor",
+                                    info="Select postprocessors for post-processing of the context.",
+                                    multiselect=True,
+                                    interactive=True,
+                                )
+
+                                with gr.Accordion("Rerank Model Configuration", open=True):
+                                    u_rerank_model_id = gr.Dropdown(
+                                        choices=avail_rerank_models,
+                                        value=cfg.rerank_model_id,
+                                        label="Rerank Model",
+                                        # info="Select a Rerank Model",
+                                        multiselect=False,
+                                        allow_custom_value=True,
+                                    )
+
+                                    u_rerank_device = gr.Dropdown(
+                                        choices=avail_devices,
+                                        value=cfg.rerank_device,
+                                        label="Rerank run device",
+                                        # info="Run rerank model on which device?",
+                                        multiselect=False,
+                                    )
+
+                        with gr.Column(variant="panel"):
+                            with gr.Accordion("Generator"):
+                                u_generator = gr.Dropdown(
+                                    choices=avail_generators,
+                                    value=cfg.generator,
+                                    label="Generator",
+                                    info="Select a generator for AI inference.",
+                                    multiselect=False,
+                                    interactive=True,
+                                )
+
+                                with gr.Accordion("LLM Configuration", open=True):
+                                    u_llm_model_id = gr.Dropdown(
+                                        choices=avail_llms,
+                                        value=cfg.llm_model_id,
+                                        label="Large Language Model",
+                                        # info="Select a Large Language Model",
+                                        multiselect=False,
+                                        allow_custom_value=True,
+                                    )
+
+                                    u_llm_device = gr.Dropdown(
+                                        choices=avail_devices,
+                                        value=cfg.llm_device,
+                                        label="LLM run device",
+                                        # info="Run LLM on which device?",
+                                        multiselect=False,
+                                    )
+
+                                    u_llm_weights = gr.Radio(
+                                        avail_weights_compression,
+                                        label="Weights",
+                                        info="weights compression",
+                                    )
+
+        # -------------------
+        # RAG Settings Events
+        # -------------------
+        # Event handlers
+        def show_pipeline_detail(evt: gr.SelectData):
+            # get selected pipeline id
+            # Dataframe: {'headers': '', 'data': [[x00, x01], [x10, x11]}
+            # SelectData.index: [i, j]
+            print(u_pipelines.value["data"])
+            print(evt.index)
+            # always use pipeline id for indexing
+            selected_id = pipeline_df[evt.index[0]][0]
+            pl = cli.get_pipeline(selected_id)
+            # TODO: change to json fomart
+            # pl["postprocessor"][0]["processor_type"]
+            # pl["postprocessor"]["model"]["model_id"], pl["postprocessor"]["model"]["device"]
+            return (
+                pl["name"],
+                pl["status"]["active"],
+                pl["node_parser"]["parser_type"],
+                pl["node_parser"]["chunk_size"],
+                pl["node_parser"]["chunk_overlap"],
+                pl["indexer"]["indexer_type"],
+                pl["retriever"]["retriever_type"],
+                pl["retriever"]["retrieve_topk"],
+                pl["generator"]["generator_type"],
+                pl["generator"]["model"]["model_id"],
+                pl["generator"]["model"]["device"],
+                "",
+                pl["indexer"]["model"]["model_id"],
+                pl["indexer"]["model"]["device"],
+            )
+
+        def modify_create_pipeline_button():
+            return "Create Pipeline"
+
+        def modify_update_pipeline_button():
+            return "Update Pipeline"
+
+        def create_update_pipeline(
+            name,
+            active,
+            node_parser,
+            chunk_size,
+            chunk_overlap,
+            indexer,
+            retriever,
+            vector_search_top_k,
+            postprocessor,
+            generator,
+            llm_id,
+            llm_device,
+            llm_weights,
+            embedding_id,
+            embedding_device,
+            rerank_id,
+            rerank_device,
+        ):
+            res = cli.create_update_pipeline(
+                name,
+                active,
+                node_parser,
+                chunk_size,
+                chunk_overlap,
+                indexer,
+                retriever,
+                vector_search_top_k,
+                postprocessor,
+                generator,
+                llm_id,
+                llm_device,
+                llm_weights,
+                embedding_id,
+                embedding_device,
+                rerank_id,
+                rerank_device,
+            )
+            return res, get_pipeline_df()
+
+        # Events
+        u_pipelines.select(
+            show_pipeline_detail,
+            inputs=None,
+            outputs=[
+                u_pipeline_name,
+                u_active,
+                # node parser
+                u_node_parser,
+                u_chunk_size,
+                u_chunk_overlap,
+                # indexer
+                u_indexer,
+                # retriever
+                u_retriever,
+                u_vector_search_top_k,
+                # postprocessor
+                # u_postprocessor,
+                # generator
+                u_generator,
+                # models
+                u_llm_model_id,
+                u_llm_device,
+                u_llm_weights,
+                u_embed_model_id,
+                u_embed_device,
+                # u_rerank_model_id,
+                # u_rerank_device
+            ],
+        )
+
+        u_pipeline_name.input(modify_create_pipeline_button, inputs=None, outputs=rag_create_pipeline)
+
+        # Create pipeline button will change to update pipeline button if any
+        # of the listed fields changed
+        gr.on(
+            triggers=[
+                u_active.input,
+                # node parser
+                u_node_parser.input,
+                u_chunk_size.input,
+                u_chunk_overlap.input,
+                # indexer
+                u_indexer.input,
+                # retriever
+                u_retriever.input,
+                u_vector_search_top_k.input,
+                # postprocessor
+                u_postprocessor.input,
+                # generator
+                u_generator.input,
+                # models
+                u_llm_model_id.input,
+                u_llm_device.input,
+                u_llm_weights.input,
+                u_embed_model_id.input,
+                u_embed_device.input,
+                u_rerank_model_id.input,
+                u_rerank_device.input,
+            ],
+            fn=modify_update_pipeline_button,
+            inputs=None,
+            outputs=rag_create_pipeline,
+        )
+
+        rag_create_pipeline.click(
+            create_update_pipeline,
+            inputs=[
+                u_pipeline_name,
+                u_active,
+                u_node_parser,
+                u_chunk_size,
+                u_chunk_overlap,
+                u_indexer,
+                u_retriever,
+                u_vector_search_top_k,
+                u_postprocessor,
+                u_generator,
+                u_llm_model_id,
+                u_llm_device,
+                u_llm_weights,
+                u_embed_model_id,
+                u_embed_device,
+                u_rerank_model_id,
+                u_rerank_device,
+            ],
+            outputs=[u_rag_pipeline_status, u_pipelines],
+            queue=False,
+        )
+
+        rag_activate_pipeline.click(
+            cli.activate_pipeline,
+            inputs=[u_pipeline_name],
+            outputs=[u_rag_pipeline_status, u_active],
+            queue=False,
+        )
+
+        # --------------
+        # Chatbot Layout
+        # --------------
+        def get_files():
+            return cli.get_files()
+
+        def create_vectordb(docs, spliter, vector_db):
+            res = cli.create_vectordb(docs, spliter, vector_db)
+            return gr.update(value=get_files()), res
+
+        global u_files_selected_row
+        u_files_selected_row = None
+
+        def select_file(data, evt: gr.SelectData):
+            if not evt.selected or len(evt.index) == 0:
+                return "No file selected"
+            global u_files_selected_row
+            row_index = evt.index[0]
+            u_files_selected_row = data.iloc[row_index]
+            file_name, file_id = u_files_selected_row
+            return f"File Name: {file_name}\nFile ID: {file_id}"
+
+        def deselect_file():
+            global u_files_selected_row
+            u_files_selected_row = None
+            return gr.update(value=get_files()), "Selection cleared"
+
+        def delete_file():
+            global u_files_selected_row
+            if u_files_selected_row is None:
+                res = "Please select a file first."
+            else:
+                file_name, file_id = u_files_selected_row
+                u_files_selected_row = None
+                res = cli.delete_file(file_id)
+            return gr.update(value=get_files()), res
+
+        with gr.Tab("Chatbot"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    docs = gr.File(
+                        label="Step 1: Load text files",
+                        file_count="multiple",
+                        file_types=[
+                            ".csv",
+                            ".doc",
+                            ".docx",
+                            ".enex",
+                            ".epub",
+                            ".html",
+                            ".md",
+                            ".odt",
+                            ".pdf",
+                            ".ppt",
+                            ".pptx",
+                            ".txt",
+                        ],
+                    )
+                    retriever_argument = gr.Accordion("Vector Store Configuration", open=False)
+                    with retriever_argument:
+                        spliter = gr.Dropdown(
+                            ["Character", "RecursiveCharacter", "Markdown", "Chinese"],
+                            value=cfg.splitter_name,
+                            label="Text Spliter",
+                            info="Method used to split the documents",
+                            multiselect=False,
+                        )
+
+                        vector_db = gr.Dropdown(
+                            ["FAISS", "Chroma"],
+                            value=cfg.vector_db,
+                            label="Vector Stores",
+                            info="Stores embedded data and performs vector search.",
+                            multiselect=False,
+                        )
+                    load_docs = gr.Button("Upload files")
+
+                    u_files_status = gr.Textbox(label="File Processing Status", value="", interactive=False)
+                    u_files = gr.Dataframe(
+                        headers=["Loaded File Name", "File ID"],
+                        value=get_files,
+                        label="Loaded Files",
+                        show_label=False,
+                        interactive=False,
+                        every=5,
+                    )
+
+                    with gr.Accordion("Delete File", open=False):
+                        selected_files = gr.Textbox(label="Click file to select", value="", interactive=False)
+                        with gr.Row():
+                            with gr.Column():
+                                delete_button = gr.Button("Delete Selected File")
+                            with gr.Column():
+                                deselect_button = gr.Button("Clear Selection")
+
+                    do_rag = gr.Checkbox(
+                        value=True,
+                        label="RAG is ON",
+                        interactive=True,
+                        info="Whether to do RAG for generation",
+                    )
+                    with gr.Accordion("Generation Configuration", open=False):
+                        with gr.Row():
+                            with gr.Column():
+                                with gr.Row():
+                                    temperature = gr.Slider(
+                                        label="Temperature",
+                                        value=0.1,
+                                        minimum=0.0,
+                                        maximum=1.0,
+                                        step=0.1,
+                                        interactive=True,
+                                        info="Higher values produce more diverse outputs",
+                                    )
+                            with gr.Column():
+                                with gr.Row():
+                                    top_p = gr.Slider(
+                                        label="Top-p (nucleus sampling)",
+                                        value=1.0,
+                                        minimum=0.0,
+                                        maximum=1,
+                                        step=0.01,
+                                        interactive=True,
+                                        info=(
+                                            "Sample from the smallest possible set of tokens whose cumulative probability "
+                                            "exceeds top_p. Set to 1 to disable and sample from all tokens."
+                                        ),
+                                    )
+                            with gr.Column():
+                                with gr.Row():
+                                    top_k = gr.Slider(
+                                        label="Top-k",
+                                        value=50,
+                                        minimum=0.0,
+                                        maximum=200,
+                                        step=1,
+                                        interactive=True,
+                                        info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
+                                    )
+                            with gr.Column():
+                                with gr.Row():
+                                    repetition_penalty = gr.Slider(
+                                        label="Repetition Penalty",
+                                        value=1.1,
+                                        minimum=1.0,
+                                        maximum=2.0,
+                                        step=0.1,
+                                        interactive=True,
+                                        info="Penalize repetition — 1.0 to disable.",
+                                    )
+                with gr.Column(scale=4):
+                    chatbot = gr.Chatbot(
+                        height=600,
+                        label="Step 2: Input Query",
+                        show_copy_button=True,
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            msg = gr.Textbox(
+                                label="QA Message Box",
+                                placeholder="Chat Message Box",
+                                show_label=False,
+                                container=False,
+                            )
+                        with gr.Column():
+                            with gr.Row():
+                                submit = gr.Button("Submit")
+                                stop = gr.Button("Stop")
+                                clear = gr.Button("Clear")
+                    retriever_argument = gr.Accordion("Retriever Configuration", open=True)
+                    with retriever_argument:
+                        with gr.Row():
+                            with gr.Row():
+                                do_rerank = gr.Checkbox(
+                                    value=True,
+                                    label="Rerank searching result",
+                                    interactive=True,
+                                )
+                                hide_context = gr.Checkbox(
+                                    value=True,
+                                    label="Hide searching result in prompt",
+                                    interactive=True,
+                                )
+                            with gr.Row():
+                                search_method = gr.Dropdown(
+                                    ["similarity_score_threshold", "similarity", "mmr"],
+                                    value=cfg.search_method,
+                                    label="Searching Method",
+                                    info="Method used to search vector store",
+                                    multiselect=False,
+                                    interactive=True,
+                                )
+                            with gr.Row():
+                                score_threshold = gr.Slider(
+                                    0.01,
+                                    0.99,
+                                    value=cfg.score_threshold,
+                                    step=0.01,
+                                    label="Similarity Threshold",
+                                    info="Only working for 'similarity score threshold' method",
+                                    interactive=True,
+                                )
+                            with gr.Row():
+                                vector_rerank_top_n = gr.Slider(
+                                    1,
+                                    10,
+                                    value=cfg.k_rerank,
+                                    step=1,
+                                    label="Rerank top n",
+                                    info="Number of rerank results",
+                                    interactive=True,
+                                )
+        load_docs.click(
+            create_vectordb,
+            inputs=[
+                docs,
+                spliter,
+                vector_db,
+            ],
+            outputs=[u_files, u_files_status],
+            queue=True,
+        )
+        # TODO: Need to de-select the dataframe,
+        # otherwise every time the dataframe is updated, a select event is triggered
+        u_files.select(select_file, inputs=[u_files], outputs=selected_files, queue=True)
+
+        delete_button.click(
+            delete_file,
+            outputs=[u_files, u_files_status],
+            queue=True,
+        )
+        deselect_button.click(
+            deselect_file,
+            outputs=[u_files, selected_files],
+            queue=True,
+        )
+
+        submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot,
+            [
+                chatbot,
+                temperature,
+                top_p,
+                top_k,
+                repetition_penalty,
+                hide_context,
+                do_rag,
+                docs,
+                spliter,
+                vector_db,
+                u_chunk_size,
+                u_chunk_overlap,
+                u_vector_search_top_k,
+                vector_rerank_top_n,
+                do_rerank,
+                search_method,
+                score_threshold,
+            ],
+            chatbot,
+            queue=True,
+        )
+        submit_click_event = submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot,
+            [
+                chatbot,
+                temperature,
+                top_p,
+                top_k,
+                repetition_penalty,
+                hide_context,
+                do_rag,
+                docs,
+                spliter,
+                vector_db,
+                u_chunk_size,
+                u_chunk_overlap,
+                u_vector_search_top_k,
+                vector_rerank_top_n,
+                do_rerank,
+                search_method,
+                score_threshold,
+            ],
+            chatbot,
+            queue=True,
+        )
+        # stop.click(
+        #     fn=request_cancel,
+        #     inputs=None,
+        #     outputs=None,
+        #     cancels=[submit_event, submit_click_event],
+        #     queue=False,
+        # )
+        clear.click(lambda: None, None, chatbot, queue=False)
+    return demo
+
+
+def main():
+    # Create the parser
+    parser = argparse.ArgumentParser(description="Load Embedding and LLM Models with OpenVino.")
+    # Add the arguments
+    parser.add_argument("--prompt_template", type=str, required=False, help="User specific template")
+    # parser.add_argument("--server_name", type=str, default="0.0.0.0")
+    # parser.add_argument("--server_port", type=int, default=8082)
+    parser.add_argument("--config", type=str, default="./default.yaml", help="configuration file path")
+    parser.add_argument("--share", action="store_true", help="share model")
+    parser.add_argument("--debug", action="store_true", help="enable debugging")
+
+    # Execute the parse_args() method to collect command line arguments
+    args = parser.parse_args()
+    logger.info(args)
+    cfg = OmegaConf.load(args.config)
+    init_cfg_(cfg)
+    logger.info(cfg)
+
+    demo = build_demo(cfg, args)
+    # if you are launching remotely, specify server_name and server_port
+    # demo.launch(server_name='your server name', server_port='server port in int')
+    # if you have any issue to launch on your platform, you can pass share=True to launch method:
+    # demo.launch(share=True)
+    # it creates a publicly shareable link for the interface. Read more in the docs: https://gradio.app/docs/
+    # demo.launch(share=True)
+    demo.queue().launch(
+        server_name=UI_SERVICE_HOST_IP, server_port=UI_SERVICE_PORT, share=args.share, allowed_paths=["."]
+    )
+
+    # %%
+    # please run this cell for stopping gradio interface
+    demo.close()
+
+
+def init_cfg_(cfg):
+    if "name" not in cfg:
+        cfg.name = "default"
+    if "embedding_device" not in cfg:
+        cfg.embedding_device = "CPU"
+    if "rerank_device" not in cfg:
+        cfg.rerank_device = "CPU"
+    if "llm_device" not in cfg:
+        cfg.llm_device = "CPU"
+    if "model_language" not in cfg:
+        cfg.model_language = "Chinese"
+    if "vector_db" not in cfg:
+        cfg.vector_db = "FAISS"
+    if "splitter_name" not in cfg:
+        cfg.splitter_name = "RecursiveCharacter"  # or "Chinese"
+    if "search_method" not in cfg:
+        cfg.search_method = "similarity"
+    if "score_threshold" not in cfg:
+        cfg.score_threshold = 0.5
+
+
+if __name__ == "__main__":
+    main()
diff --git a/EdgeCraftRAG/ui/gradio/platform_config.py b/EdgeCraftRAG/ui/gradio/platform_config.py
new file mode 100644
index 0000000000..852409c1c0
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/platform_config.py
@@ -0,0 +1,114 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import sys
+from enum import Enum
+
+import openvino.runtime as ov
+from config import SUPPORTED_EMBEDDING_MODELS, SUPPORTED_LLM_MODELS, SUPPORTED_RERANK_MODELS
+
+sys.path.append("..")
+from edgecraftrag.base import GeneratorType, IndexerType, NodeParserType, PostProcessorType, RetrieverType
+
+
+def _get_llm_model_ids(supported_models, model_language=None):
+    if model_language is None:
+        model_ids = [model_id for model_id, _ in supported_models.items()]
+        return model_ids
+
+    if model_language not in supported_models:
+        print("Invalid model language! Please choose from the available options.")
+        return None
+
+    # Create a list of model IDs based on the selected language
+    llm_model_ids = [
+        model_id
+        for model_id, model_config in supported_models[model_language].items()
+        if model_config.get("rag_prompt_template") or model_config.get("normalize_embeddings")
+    ]
+
+    return llm_model_ids
+
+
+def _list_subdirectories(parent_directory):
+    """List all subdirectories under the given parent directory using os.listdir.
+
+    Parameters:
+    parent_directory (str): The path to the parent directory from which to list subdirectories.
+
+    Returns:
+    list: A list of subdirectory names found in the parent directory.
+    """
+    # Get a list of all entries in the parent directory
+    entries = os.listdir(parent_directory)
+
+    # Filter out the entries to only keep directories
+    subdirectories = [entry for entry in entries if os.path.isdir(os.path.join(parent_directory, entry))]
+
+    return sorted(subdirectories)
+
+
+def _get_available_models(model_ids, local_dirs):
+    """Filters and sorts model IDs based on their presence in the local directories.
+
+    Parameters:
+    model_ids (list): A list of model IDs to check.
+    local_dirs (list): A list of local directory names to check against.
+
+    Returns:
+    list: A sorted list of available model IDs.
+    """
+    # Filter model_ids for those that are present in local directories
+    return sorted([model_id for model_id in model_ids if model_id in local_dirs])
+
+
+def get_local_available_models(model_type: str, local_path: str = "./"):
+    local_dirs = _list_subdirectories(local_path)
+    if model_type == "llm":
+        model_ids = _get_llm_model_ids(SUPPORTED_LLM_MODELS, "Chinese")
+    elif model_type == "embed":
+        model_ids = _get_llm_model_ids(SUPPORTED_EMBEDDING_MODELS, "Chinese")
+    elif model_type == "rerank":
+        model_ids = _get_llm_model_ids(SUPPORTED_RERANK_MODELS)
+    else:
+        print("Unknown model type")
+    avail_models = _get_available_models(model_ids, local_dirs)
+    return avail_models
+
+
+def get_available_devices():
+    core = ov.Core()
+    avail_devices = core.available_devices + ["AUTO"]
+    if "NPU" in avail_devices:
+        avail_devices.remove("NPU")
+    return avail_devices
+
+
+def get_available_weights():
+    avail_weights_compression = ["FP16", "INT8", "INT4"]
+    return avail_weights_compression
+
+
+def get_enum_values(c: Enum):
+    return [v.value for k, v in vars(c).items() if not callable(v) and not k.startswith("__") and not k.startswith("_")]
+
+
+def get_available_node_parsers():
+    return get_enum_values(NodeParserType)
+
+
+def get_available_indexers():
+    return get_enum_values(IndexerType)
+
+
+def get_available_retrievers():
+    return get_enum_values(RetrieverType)
+
+
+def get_available_postprocessors():
+    return get_enum_values(PostProcessorType)
+
+
+def get_available_generators():
+    return get_enum_values(GeneratorType)
diff --git a/FaqGen/Dockerfile b/FaqGen/Dockerfile
index 08307f0046..4018b44d1f 100644
--- a/FaqGen/Dockerfile
+++ b/FaqGen/Dockerfile
@@ -19,7 +19,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./faqgen.py /home/user/faqgen.py
diff --git a/FaqGen/benchmark/accuracy/evaluate.py b/FaqGen/benchmark/accuracy/evaluate.py
index 30998da4dd..da75502ce0 100644
--- a/FaqGen/benchmark/accuracy/evaluate.py
+++ b/FaqGen/benchmark/accuracy/evaluate.py
@@ -35,7 +35,7 @@
     contexts.append([inputs_faq])
 
 embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en-v1.5")
-metrics_faq = ["answer_relevancy", "faithfulness", "context_utilization", "reference_free_rubrics_score"]
+metrics_faq = ["answer_relevancy", "faithfulness", "context_utilization", "rubrics_score_without_reference"]
 metric = RagasMetric(threshold=0.5, model=llm_endpoint, embeddings=embeddings, metrics=metrics_faq)
 
 test_case = {"question": question, "answer": answer, "ground_truth": ground_truth, "contexts": contexts}
diff --git a/FaqGen/benchmark/accuracy/launch_tgi.sh b/FaqGen/benchmark/accuracy/launch_tgi.sh
index f4ac9eade4..1a1d23ee8d 100644
--- a/FaqGen/benchmark/accuracy/launch_tgi.sh
+++ b/FaqGen/benchmark/accuracy/launch_tgi.sh
@@ -11,7 +11,6 @@ docker run -it --rm \
     -p $port_number:80 \
     -v $volume:/data \
     --runtime=habana \
-    --restart always \
     -e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \
     -e HABANA_VISIBLE_DEVICES=all \
     -e OMPI_MCA_btl_vader_single_copy_mechanism=none \
@@ -20,7 +19,7 @@ docker run -it --rm \
     --ipc=host \
     -e HTTPS_PROXY=$https_proxy \
     -e HTTP_PROXY=$https_proxy \
-    ghcr.io/huggingface/tgi-gaudi:2.0.5 \
+    ghcr.io/huggingface/tgi-gaudi:2.0.6 \
     --model-id $model_name \
     --max-input-tokens $max_input_tokens \
     --max-total-tokens $max_total_tokens \
diff --git a/FaqGen/benchmark/performance/README.md b/FaqGen/benchmark/performance/README.md
new file mode 100644
index 0000000000..0587a85a1e
--- /dev/null
+++ b/FaqGen/benchmark/performance/README.md
@@ -0,0 +1,77 @@
+# FaqGen Benchmarking
+
+This folder contains a collection of scripts to enable inference benchmarking by leveraging a comprehensive benchmarking tool, [GenAIEval](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md), that enables throughput analysis to assess inference performance.
+
+By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
+
+## Purpose
+
+We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
+
+- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
+- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
+- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
+
+## Metrics
+
+The benchmark will report the below metrics, including:
+
+- Number of Concurrent Requests
+- End-to-End Latency: P50, P90, P99 (in milliseconds)
+- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
+- Average Next Token Latency (in milliseconds)
+- Average Token Latency (in milliseconds)
+- Requests Per Second (RPS)
+- Output Tokens Per Second
+- Input Tokens Per Second
+
+Results will be displayed in the terminal and saved as CSV file named `1_testspec.yaml`.
+
+## Getting Started
+
+We recommend using Kubernetes to deploy the FaqGen service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs.
+
+### Prerequisites
+
+- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
+
+- Every node has direct internet access
+- Set up kubectl on the master node with access to the Kubernetes cluster.
+- Install Python 3.8+ on the master node for running GenAIEval.
+- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
+- Ensure that the container's ulimit can meet the the number of requests.
+
+```bash
+# The way to modify the containered ulimit:
+sudo systemctl edit containerd
+# Add two lines:
+[Service]
+LimitNOFILE=65536:1048576
+
+sudo systemctl daemon-reload; sudo systemctl restart containerd
+```
+
+### Test Steps
+
+Please deploy FaqGen service before benchmarking.
+
+#### Run Benchmark Test
+
+Before the benchmark, we can configure the number of test queries and test output directory by:
+
+```bash
+export USER_QUERIES="[1, 1, 1, 1]"
+export TEST_OUTPUT_DIR="/tmp/benchmark_output"
+```
+
+And then run the benchmark by:
+
+```bash
+bash benchmark.sh -n <node_count>
+```
+
+The argument `-n` refers to the number of test nodes.
+
+#### Data collection
+
+All the test results will come to this folder `/tmp/benchmark_output` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
diff --git a/FaqGen/benchmark/performance/benchmark.sh b/FaqGen/benchmark/performance/benchmark.sh
new file mode 100644
index 0000000000..44abdecbb1
--- /dev/null
+++ b/FaqGen/benchmark/performance/benchmark.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+deployment_type="k8s"
+node_number=1
+service_port=8888
+query_per_node=128
+
+benchmark_tool_path="$(pwd)/GenAIEval"
+
+usage() {
+    echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
+    echo "  -d deployment_type    deployment type, select between k8s and docker (default: ${deployment_type})"
+    echo "  -n node_number        Test node number, required only for k8s deployment_type, (default: ${node_number})"
+    echo "  -i service_ip         service ip, required only for docker deployment_type"
+    echo "  -p service_port       service port, required only for docker deployment_type, (default: ${service_port})"
+    exit 1
+}
+
+while getopts ":d:n:i:p:" opt; do
+    case ${opt} in
+        d )
+            deployment_type=$OPTARG
+            ;;
+        n )
+            node_number=$OPTARG
+            ;;
+        i )
+            service_ip=$OPTARG
+            ;;
+        p )
+            service_port=$OPTARG
+            ;;
+        \? )
+            echo "Invalid option: -$OPTARG" 1>&2
+            usage
+            ;;
+        : )
+            echo "Invalid option: -$OPTARG requires an argument" 1>&2
+            usage
+            ;;
+    esac
+done
+
+if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
+    echo "Error: service_ip is required for docker deployment_type" 1>&2
+    usage
+fi
+
+if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
+    echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
+fi
+
+function main() {
+    if [[ ! -d ${benchmark_tool_path} ]]; then
+        echo "Benchmark tool not found, setting up..."
+        setup_env
+    fi
+    run_benchmark
+}
+
+function setup_env() {
+    git clone https://github.com/opea-project/GenAIEval.git
+    pushd ${benchmark_tool_path}
+    python3 -m venv stress_venv
+    source stress_venv/bin/activate
+    pip install -r requirements.txt
+    popd
+}
+
+function run_benchmark() {
+    source ${benchmark_tool_path}/stress_venv/bin/activate
+    export DEPLOYMENT_TYPE=${deployment_type}
+    export SERVICE_IP=${service_ip:-"None"}
+    export SERVICE_PORT=${service_port:-"None"}
+    if [[ -z $USER_QUERIES ]]; then
+        user_query=$((query_per_node*node_number))
+        export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
+        echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
+    fi
+    export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
+    if [[ -z $WARMUP ]]; then export WARMUP=0; fi
+    if [[ -z $TEST_OUTPUT_DIR ]]; then
+        if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
+        else
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
+        fi
+        echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
+    fi
+
+    envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
+    cd ${benchmark_tool_path}/evals/benchmark
+    python benchmark.py
+}
+
+main
diff --git a/FaqGen/benchmark/performance/benchmark.yaml b/FaqGen/benchmark/performance/benchmark.yaml
new file mode 100644
index 0000000000..2c9c914de3
--- /dev/null
+++ b/FaqGen/benchmark/performance/benchmark.yaml
@@ -0,0 +1,47 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+test_suite_config: # Overall configuration settings for the test suite
+  examples: ["faqgen"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
+  deployment_type: "k8s"  # Default is "k8s", can also be "docker"
+  service_ip: None  # Leave as None for k8s, specify for Docker
+  service_port: None  # Leave as None for k8s, specify for Docker
+  warm_ups: 0  # Number of test requests for warm-up
+  run_time: 60m  # The max total run time for the test suite
+  seed:  # The seed for all RNGs
+  user_queries: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]  # Number of test requests at each concurrency level
+  query_timeout: 120  # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
+  random_prompt: false  # Use random prompts if true, fixed prompts if false
+  collect_service_metric: false  # Collect service metrics if true, do not collect service metrics if false
+  data_visualization: false # Generate data visualization if true, do not generate data visualization if false
+  llm_model: "meta-llama/Meta-Llama-3-8B-Instruct"  # The LLM model used for the test
+  test_output_dir: "/tmp/benchmark_output"  # The directory to store the test output
+  load_shape:              # Tenant concurrency pattern
+    name: constant           # poisson or constant(locust default load shape)
+    params:                  # Loadshape-specific parameters
+      constant:                # Constant load shape specific parameters, activate only if load_shape.name is constant
+        concurrent_level: 4      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
+        # arrival_rate: 1.0       # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
+      poisson:                 # Poisson load shape specific parameters, activate only if load_shape.name is poisson
+        arrival_rate: 1.0        # Request arrival rate
+  namespace: "" # Fill the user-defined namespace. Otherwise, it will be default.
+
+test_cases:
+  faqgen:
+    llm:
+      run_test: false
+      service_name: "faq-tgi-svc"  # Replace with your service name
+      parameters:
+        model_name: "meta-llama/Meta-Llama-3-8B-Instruct"
+        max_new_tokens: 128
+        temperature: 0.01
+        top_k: 10
+        top_p: 0.95
+        repetition_penalty: 1.03
+        streaming: true
+    llmserve:
+      run_test: false
+      service_name: "faq-micro-svc"  # Replace with your service name
+    e2e:
+      run_test: true
+      service_name: "faq-mega-server-svc"  # Replace with your service name
diff --git a/FaqGen/docker_compose/intel/cpu/xeon/README.md b/FaqGen/docker_compose/intel/cpu/xeon/README.md
index 04fea0f859..2ed343e2ef 100644
--- a/FaqGen/docker_compose/intel/cpu/xeon/README.md
+++ b/FaqGen/docker_compose/intel/cpu/xeon/README.md
@@ -79,6 +79,7 @@ export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_PORT=9000
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/faqgen"
 ```
 
@@ -114,9 +115,11 @@ docker compose up -d
 3. MegaService
 
    ```bash
-   curl http://${host_ip}:8888/v1/faqgen -H "Content-Type: application/json" -d '{
-        "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
-        }'
+   curl http://${host_ip}:8888/v1/faqgen \
+      -H "Content-Type: multipart/form-data" \
+      -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
+      -F "max_tokens=32" \
+      -F "stream=false"
    ```
 
    Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service.
diff --git a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
index 8c5c894aed..18a6a7ec35 100644
--- a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-xeon-server
     ports:
       - "8008:80"
@@ -46,6 +46,7 @@ services:
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
     ipc: host
     restart: always
   faqgen-xeon-ui-server:
@@ -59,7 +60,7 @@ services:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - FAQ_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
     ipc: host
     restart: always
 networks:
diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/README.md b/FaqGen/docker_compose/intel/hpu/gaudi/README.md
index acdded9c2c..81473e49c2 100644
--- a/FaqGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/FaqGen/docker_compose/intel/hpu/gaudi/README.md
@@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally. This step can be ignored
 As TGI Gaudi has been officially published as a Docker image, we simply need to pull it:
 
 ```bash
-docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
 ```
 
 ### 2. Build LLM Image
@@ -28,7 +28,7 @@ To construct the Mega Service, we utilize the [GenAIComps](https://github.com/op
 
 ```bash
 git clone https://github.com/opea-project/GenAIExamples
-cd GenAIExamples/FaqGen/docker/
+cd GenAIExamples/FaqGen/
 docker build --no-cache -t opea/faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
 ```
 
@@ -37,7 +37,7 @@ docker build --no-cache -t opea/faqgen:latest --build-arg https_proxy=$https_pro
 Construct the frontend Docker image using the command below:
 
 ```bash
-cd GenAIExamples/FaqGen/
+cd GenAIExamples/FaqGen/ui
 docker build -t opea/faqgen-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
 ```
 
@@ -53,7 +53,7 @@ docker build -t opea/faqgen-react-ui:latest --build-arg https_proxy=$https_proxy
 
 Then run the command `docker images`, you will have the following Docker Images:
 
-1. `ghcr.io/huggingface/tgi-gaudi:2.0.5`
+1. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
 2. `opea/llm-faqgen-tgi:latest`
 3. `opea/faqgen:latest`
 4. `opea/faqgen-ui:latest`
@@ -80,6 +80,7 @@ export TGI_LLM_ENDPOINT="http://${your_ip}:8008"
 export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_PORT=9000
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/faqgen"
 ```
 
@@ -115,9 +116,11 @@ docker compose up -d
 3. MegaService
 
    ```bash
-   curl http://${host_ip}:8888/v1/faqgen -H "Content-Type: application/json" -d '{
-        "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
-        }'
+   curl http://${host_ip}:8888/v1/faqgen \
+      -H "Content-Type: multipart/form-data" \
+      -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
+      -F "max_tokens=32" \
+      -F "stream=false"
    ```
 
 ## 🚀 Launch the UI
diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
index 1ee36bd302..f810319f0e 100644
--- a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "8008:80"
@@ -56,6 +56,7 @@ services:
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
     ipc: host
     restart: always
   faqgen-gaudi-ui-server:
@@ -69,7 +70,7 @@ services:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
+      - FAQ_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
     ipc: host
     restart: always
 
diff --git a/FaqGen/faqgen.yaml b/FaqGen/faqgen.yaml
index 8d354871e0..5b924a38eb 100644
--- a/FaqGen/faqgen.yaml
+++ b/FaqGen/faqgen.yaml
@@ -6,7 +6,7 @@ opea_micro_services:
   tgi-service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     volumes:
       - "./data:/data"
     runtime: habana
diff --git a/FaqGen/kubernetes/intel/README.md b/FaqGen/kubernetes/intel/README.md
index 461941b33a..42c099ef45 100644
--- a/FaqGen/kubernetes/intel/README.md
+++ b/FaqGen/kubernetes/intel/README.md
@@ -17,7 +17,7 @@ If use gated models, you also need to provide [huggingface token](https://huggin
 ## Deploy On Xeon
 
 ```
-cd GenAIExamples/FaqGen/kubernetes/intel/cpu/xeon/manifests
+cd GenAIExamples/FaqGen/kubernetes/intel/cpu/xeon/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" faqgen.yaml
 kubectl apply -f faqgen.yaml
@@ -26,7 +26,7 @@ kubectl apply -f faqgen.yaml
 ## Deploy On Gaudi
 
 ```
-cd GenAIExamples/FaqGen/kubernetes/intel/hpu/gaudi/manifests
+cd GenAIExamples/FaqGen/kubernetes/intel/hpu/gaudi/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" faqgen.yaml
 kubectl apply -f faqgen.yaml
diff --git a/FaqGen/kubernetes/intel/cpu/xeon/manifest/README_react_ui.md b/FaqGen/kubernetes/intel/cpu/xeon/manifest/README_react_ui.md
index ff768c4acd..2e0ffcdb40 100644
--- a/FaqGen/kubernetes/intel/cpu/xeon/manifest/README_react_ui.md
+++ b/FaqGen/kubernetes/intel/cpu/xeon/manifest/README_react_ui.md
@@ -16,7 +16,7 @@ Before deploying the react-faqgen.yaml file, ensure that you have the following
        ```
        # You may set the HUGGINGFACEHUB_API_TOKEN via method:
        export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
-       cd GenAIExamples/FaqGen/kubernetes/intel/cpu/xeon/manifests/ui/
+       cd GenAIExamples/FaqGen/kubernetes/intel/cpu/xeon/manifest/ui/
        sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" react-faqgen.yaml
        ```
     b. Set the proxies based on your network configuration
diff --git a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
index 845ba50412..4577372495 100644
--- a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
+++ b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
@@ -126,7 +126,7 @@ spec:
             - name: no_proxy
               value:
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
@@ -264,7 +264,7 @@ spec:
       containers:
         - name: faqgen-react-ui
           env:
-            - name: DOC_BASE_URL
+            - name: FAQ_BASE_URL
               value: "http://faqgen:8888/v1/faqgen"
             - name: http_proxy
               value:
diff --git a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_ui.yaml b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_ui.yaml
index f74299a094..6b531a0c78 100644
--- a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_ui.yaml
+++ b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_ui.yaml
@@ -22,7 +22,7 @@ spec:
       containers:
         - name: faq-mega-ui-deploy
           env:
-            - name: DOC_BASE_URL
+            - name: FAQ_BASE_URL
               value: http://{insert_your_ip_here}:7779/v1/faqgen
           image: opea/faqgen-ui:latest
           imagePullPolicy: IfNotPresent
diff --git a/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen.yaml b/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen.yaml
index 2703cbc4ef..a9b8ef199e 100644
--- a/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen.yaml
+++ b/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen.yaml
@@ -47,7 +47,7 @@ spec:
             value: 'true'
           - name: FLASH_ATTENTION_RECOMPUTE
             value: 'true'
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.6
         imagePullPolicy: IfNotPresent
         securityContext:
           capabilities:
diff --git a/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen_ui.yaml b/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen_ui.yaml
index f74299a094..6b531a0c78 100644
--- a/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen_ui.yaml
+++ b/FaqGen/kubernetes/intel/hpu/gaudi/manifest/faqgen_ui.yaml
@@ -22,7 +22,7 @@ spec:
       containers:
         - name: faq-mega-ui-deploy
           env:
-            - name: DOC_BASE_URL
+            - name: FAQ_BASE_URL
               value: http://{insert_your_ip_here}:7779/v1/faqgen
           image: opea/faqgen-ui:latest
           imagePullPolicy: IfNotPresent
diff --git a/FaqGen/tests/test_compose_on_gaudi.sh b/FaqGen/tests/test_compose_on_gaudi.sh
index a583397801..dc12dfde8a 100644
--- a/FaqGen/tests/test_compose_on_gaudi.sh
+++ b/FaqGen/tests/test_compose_on_gaudi.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="faqgen faqgen-ui llm-faqgen-tgi"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker images && sleep 1s
 }
 
@@ -34,6 +34,7 @@ function start_services() {
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export LLM_SERVICE_HOST_IP=${ip_address}
+    export LLM_SERVICE_PORT=9000
     export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/faqgen"
 
     sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -101,13 +102,30 @@ function validate_microservices() {
 }
 
 function validate_megaservice() {
-    # Curl the Mega Service
-    validate_services \
-    "${ip_address}:8888/v1/faqgen" \
-    "Text Embeddings Inference" \
-    "mega-faqgen" \
-    "faqgen-gaudi-backend-server" \
-    '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+    local SERVICE_NAME="mega-faqgen"
+    local DOCKER_NAME="faqgen-gaudi-backend-server"
+    local EXPECTED_RESULT="Embeddings"
+    local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+    local URL="${ip_address}:8888/v1/faqgen"
+    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+
+        local CONTENT=$(curl -s -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+
+        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+            echo "[ $SERVICE_NAME ] Content is as expected."
+        else
+            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+            docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+            exit 1
+        fi
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+        exit 1
+    fi
+    sleep 1s
 }
 
 function validate_frontend() {
@@ -152,7 +170,7 @@ function main() {
 
     validate_microservices
     validate_megaservice
-    validate_frontend
+    # validate_frontend
 
     stop_docker
     echo y | docker system prune
diff --git a/FaqGen/tests/test_compose_on_xeon.sh b/FaqGen/tests/test_compose_on_xeon.sh
index c6265e02d8..3dbde68283 100755
--- a/FaqGen/tests/test_compose_on_xeon.sh
+++ b/FaqGen/tests/test_compose_on_xeon.sh
@@ -34,6 +34,7 @@ function start_services() {
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
     export MEGA_SERVICE_HOST_IP=${ip_address}
     export LLM_SERVICE_HOST_IP=${ip_address}
+    export LLM_SERVICE_PORT=9000
     export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/faqgen"
 
     sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
@@ -101,13 +102,30 @@ function validate_microservices() {
 }
 
 function validate_megaservice() {
-    # Curl the Mega Service
-    validate_services \
-    "${ip_address}:8888/v1/faqgen" \
-    "Text Embeddings Inference" \
-    "mega-faqgen" \
-    "faqgen-xeon-backend-server" \
-    '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+    local SERVICE_NAME="mega-faqgen"
+    local DOCKER_NAME="faqgen-xeon-backend-server"
+    local EXPECTED_RESULT="Embeddings"
+    local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+    local URL="${ip_address}:8888/v1/faqgen"
+    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+
+        local CONTENT=$(curl -s -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+
+        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+            echo "[ $SERVICE_NAME ] Content is as expected."
+        else
+            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+            docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+            exit 1
+        fi
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+        exit 1
+    fi
+    sleep 1s
 }
 
 function validate_frontend() {
@@ -152,7 +170,7 @@ function main() {
 
     validate_microservices
     validate_megaservice
-    validate_frontend
+    # validate_frontend
 
     stop_docker
     echo y | docker system prune
diff --git a/FaqGen/ui/docker/Dockerfile b/FaqGen/ui/docker/Dockerfile
index ac2bb7da31..1d5115f4b5 100644
--- a/FaqGen/ui/docker/Dockerfile
+++ b/FaqGen/ui/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN npm run build
 EXPOSE 5173
 
 # Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
\ No newline at end of file
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
diff --git a/FaqGen/ui/docker/Dockerfile.react b/FaqGen/ui/docker/Dockerfile.react
index 4e29136a6d..734c6ba1dd 100644
--- a/FaqGen/ui/docker/Dockerfile.react
+++ b/FaqGen/ui/docker/Dockerfile.react
@@ -17,4 +17,4 @@ EXPOSE 80
 COPY --from=vite-app /usr/app/react/nginx.conf /etc/nginx/conf.d/default.conf
 COPY --from=vite-app /usr/app/react/dist /usr/share/nginx/html
 
-ENTRYPOINT ["nginx", "-g", "daemon off;"]
\ No newline at end of file
+ENTRYPOINT ["nginx", "-g", "daemon off;"]
diff --git a/FaqGen/ui/react/nginx.conf b/FaqGen/ui/react/nginx.conf
index 00433fcda7..01aef12751 100644
--- a/FaqGen/ui/react/nginx.conf
+++ b/FaqGen/ui/react/nginx.conf
@@ -17,4 +17,4 @@ server {
       expires 1d;
     }
   }
-}
\ No newline at end of file
+}
diff --git a/FaqGen/ui/react/public/vite.svg b/FaqGen/ui/react/public/vite.svg
index e7b8dfb1b2..ee9fadaf9c 100644
--- a/FaqGen/ui/react/public/vite.svg
+++ b/FaqGen/ui/react/public/vite.svg
@@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
diff --git a/FaqGen/ui/react/src/assets/react.svg b/FaqGen/ui/react/src/assets/react.svg
index 6c87de9bb3..8e0e0f15c0 100644
--- a/FaqGen/ui/react/src/assets/react.svg
+++ b/FaqGen/ui/react/src/assets/react.svg
@@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
diff --git a/FaqGen/ui/react/src/components/FaqGen/FaqGen.tsx b/FaqGen/ui/react/src/components/FaqGen/FaqGen.tsx
index ca731cbf8b..74f52d3ef3 100644
--- a/FaqGen/ui/react/src/components/FaqGen/FaqGen.tsx
+++ b/FaqGen/ui/react/src/components/FaqGen/FaqGen.tsx
@@ -164,4 +164,4 @@ const FaqGen = () => {
     )
 }
 
-export default FaqGen;
\ No newline at end of file
+export default FaqGen;
diff --git a/FaqGen/ui/react/src/components/FaqGen/FileUpload.tsx b/FaqGen/ui/react/src/components/FaqGen/FileUpload.tsx
index 914ac87241..aa5d84a00f 100644
--- a/FaqGen/ui/react/src/components/FaqGen/FileUpload.tsx
+++ b/FaqGen/ui/react/src/components/FaqGen/FileUpload.tsx
@@ -76,4 +76,4 @@ export function FileUpload(props: Partial<DropzoneProps>) {
             </Group>
         </Dropzone>
     );
-}
\ No newline at end of file
+}
diff --git a/FaqGen/ui/react/src/components/Shared/CodeRender/CodeRender.tsx b/FaqGen/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
index 479034cece..a21f7acc59 100644
--- a/FaqGen/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
+++ b/FaqGen/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
@@ -49,4 +49,4 @@ const CodeRender = ({ cleanCode, language, inline }:CodeRenderProps) => {
 }
 
 
-export default CodeRender;
\ No newline at end of file
+export default CodeRender;
diff --git a/FaqGen/ui/react/src/components/Shared/Markdown/Markdown.tsx b/FaqGen/ui/react/src/components/Shared/Markdown/Markdown.tsx
index 64f624bc6e..653ac4b025 100644
--- a/FaqGen/ui/react/src/components/Shared/Markdown/Markdown.tsx
+++ b/FaqGen/ui/react/src/components/Shared/Markdown/Markdown.tsx
@@ -58,4 +58,4 @@ const Markdown = ({ content }: MarkdownProps) => {
             />)
 }
 
-export default Markdown;
\ No newline at end of file
+export default Markdown;
diff --git a/FaqGen/ui/svelte/.env b/FaqGen/ui/svelte/.env
index bfdca1c9a1..4d0880c767 100644
--- a/FaqGen/ui/svelte/.env
+++ b/FaqGen/ui/svelte/.env
@@ -1 +1 @@
-DOC_BASE_URL = 'http://backend_address:8888/v1/faqgen'
+FAQ_BASE_URL = 'http://backend_address:8888/v1/faqgen'
diff --git a/FaqGen/ui/svelte/src/lib/doc.svelte b/FaqGen/ui/svelte/src/lib/doc.svelte
index bae896ba3c..f9ea335843 100644
--- a/FaqGen/ui/svelte/src/lib/doc.svelte
+++ b/FaqGen/ui/svelte/src/lib/doc.svelte
@@ -38,8 +38,8 @@
     } else {
       currentIdx = index;
       if (
-        (currentIdx === 1 && message !== "") ||
-        (currentIdx === 2 && $kb_id !== "")
+        (currentIdx === 2 && message !== "") ||
+        (currentIdx === 1 && $kb_id !== "")
       ) {
         formModal = true;
       } else {
@@ -49,10 +49,10 @@
   }
 
   function panelExchange() {
-    if (currentIdx === 2) {
+    if (currentIdx === 1) {
       kb_id.set("");
       dispatch("clearMsg", { status: true });
-    } else if (currentIdx === 1) {
+    } else if (currentIdx === 2) {
       message = "";
       dispatch("clearMsg", { status: true });
     }
@@ -152,7 +152,7 @@
       type="submit"
       data-testid="sum-click"
       class="xl:my-12 inline-flex items-center px-5 py-2.5 text-sm font-medium text-center text-white bg-blue-700 mt-2 focus:ring-4 focus:ring-blue-200 dark:focus:ring-blue-900 hover:bg-blue-800"
-      on:click={() => generateFaq()}
+       on:click={() => generateFaq()}
     >
       Generate FAQs
     </button>
@@ -165,11 +165,11 @@
   />
   {#if currentIdx === 1}
     <h3 class="mb-5 text-lg font-normal text-gray-500 dark:text-gray-400">
-      The current content will be cleared.
+      The currently uploaded file will be cleared.
     </h3>
   {:else if currentIdx === 2}
     <h3 class="mb-5 text-lg font-normal text-gray-500 dark:text-gray-400">
-      The currently uploaded file will be cleared.
+      The current content will be cleared.
     </h3>
   {/if}
 
diff --git a/FaqGen/ui/svelte/src/lib/dropFile.svelte b/FaqGen/ui/svelte/src/lib/dropFile.svelte
index ef52ca1d0c..fcc972c543 100644
--- a/FaqGen/ui/svelte/src/lib/dropFile.svelte
+++ b/FaqGen/ui/svelte/src/lib/dropFile.svelte
@@ -15,30 +15,36 @@
 -->
 
 <script lang="ts">
-  import { Dropzone } from "flowbite-svelte";
-  import ImgLogo from "./assets/imgLogo.svelte";
-  import { kb_id } from "./shared/Store.js";
-  import { fetchKnowledgeBaseId } from "./shared/Network.js";
+  import { kb_id, uploadFile, uploadFilesName } from "./shared/Store.js";
 
   let uploadInput: HTMLInputElement;
-  let uploadFileName = '';
-
+  let uploadFileName = "";
+  let displayHint = false;
 
   function handleInput(event: Event) {
     const file = (event.target as HTMLInputElement).files![0];
 
     if (!file) return;
 
+    // Check if the file size exceeds 2KB (2048 bytes)
+    if (file.size > 2048) {
+      displayHint = true;
+      setTimeout(() => {
+        displayHint = false;
+      }, 3000);
+      return; // Exit the function if the file is too large
+    }
+
     const reader = new FileReader();
     reader.onloadend = async () => {
       if (!reader.result) return;
       const src = reader.result.toString();
       const blob = await fetch(src).then((r) => r.blob());
       const fileName = file.name;
+      uploadFilesName.set(fileName);
+      kb_id.set(fileName);
+      uploadFile.set(blob);
       uploadFileName = fileName;
-      const res = await fetchKnowledgeBaseId(blob, fileName);
-      kb_id.set(res.document_id);
-      console.log("upload File", $kb_id);
     };
     reader.readAsDataURL(file);
   }
@@ -65,13 +71,18 @@
           d="M13 13h3a3 3 0 0 0 0-6h-.025A5.56 5.56 0 0 0 16 6.5 5.5 5.5 0 0 0 5.207 5.021C5.137 5.017 5.071 5 5 5a4 4 0 0 0 0 8h2.167M10 15V6m0 0L8 8m2-2 2 2"
         />
       </svg>
-      {#if uploadFileName === ''}
+      {#if uploadFileName === ""}
         <p class="mb-2 text-sm text-gray-500 dark:text-gray-400">
           <span class="font-semibold">Click to upload</span> or drag and drop
         </p>
         <p class="text-xs text-gray-500 dark:text-gray-400">
           PDF, TXT, .Doc and so on
         </p>
+        {#if displayHint}
+          <p class="text-xs text-red-500 dark:text-red-400">
+            Maximum upload size is 2KB.
+          </p>
+        {/if}
       {:else}
         <p>{uploadFileName}</p>
       {/if}
diff --git a/FaqGen/ui/svelte/src/lib/shared/Network.ts b/FaqGen/ui/svelte/src/lib/shared/Network.ts
index 2a54113e0d..243512d948 100644
--- a/FaqGen/ui/svelte/src/lib/shared/Network.ts
+++ b/FaqGen/ui/svelte/src/lib/shared/Network.ts
@@ -12,50 +12,75 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-import { SSE } from "sse.js";
 import { env } from "$env/dynamic/public";
 
-const DOC_BASE_URL = env.DOC_BASE_URL;
+const FAQ_BASE_URL = env.FAQ_BASE_URL;
 
-async function fetchPostRes(url, init) {
-  try {
-    const response = await fetch(url, init);
-    if (!response.ok) throw response.status;
-    return await response.json();
-  } catch (error) {
-    console.error("network error: ", error);
-    return undefined;
+export async function fetchTextStream(query: string | Blob, params: string, file: Blob, fileName: string | undefined) {
+  const url = `${FAQ_BASE_URL}`; // Ensure the URL is constructed correctly
+  const formData = new FormData();
+
+  if (!file) {
+    file = new Blob([""], { type: "text/plain" });
+    fileName = "empty.txt";
   }
-}
 
-export async function fetchKnowledgeBaseId(file: Blob, fileName: string) {
-  const url = `${DOC_BASE_URL}/doc_upload`;
-  const formData = new FormData();
-  formData.append("file", file, fileName);
+  if (params === "doc_id") {
+    formData.append("files", file, fileName);
+    formData.append("messages", query);
+  } else if (params === "text") {
+    formData.append("files", file, fileName);
+    formData.append("messages", query);
+  }
 
-  const init: RequestInit = {
+  // Initiate the POST request to upload the file
+  const init = {
     method: "POST",
     body: formData,
   };
 
-  return fetchPostRes(url, init);
-}
+  const postResponse = await fetch(url, init);
 
-export async function fetchTextStream(query: string, urlSuffix: string, params: string) {
-  let payload = {};
-  let url = "";
-  if (params === "doc_id") {
-    payload = { doc_id: query };
-    url = ``;
-  } else if (params === "text") {
-    payload = { messages: query };
-    url = `${DOC_BASE_URL}`;
+  if (!postResponse.ok) {
+    throw new Error(`Error uploading file: ${postResponse.status}`);
   }
 
-  console.log("url", url);
+  // Function to create an async iterator for the stream
+  async function* streamGenerator() {
+    if (!postResponse.body) {
+      throw new Error("Response body is null");
+    }
+    const reader = postResponse.body.getReader();
+    const decoder = new TextDecoder("utf-8");
+    let done, value;
+
+    let buffer = ""; // Initialize a buffer
+
+    while (({ done, value } = await reader.read())) {
+      if (done) break;
+
+      // Decode chunk and append to buffer
+      const chunk = decoder.decode(value, { stream: true });
+      buffer += chunk;
+
+      // Use regex to clean and extract data
+      const cleanedChunks = buffer
+        .split("\n")
+        .map((line) => {
+          // Remove 'data: b' at the start and ' ' at the end
+          return line.replace(/^data:\s*|^b'|'\s*$/g, "").trim(); // Clean unnecessary characters
+        })
+        .filter((line) => line); // Remove empty lines
+
+      for (const cleanedChunk of cleanedChunks) {
+        // Further clean to ensure all unnecessary parts are removed
+        yield cleanedChunk.replace(/^b'|['"]$/g, ""); // Again clean 'b' and other single or double quotes
+      }
+
+      // If there is an incomplete message in the current buffer, keep it
+      buffer = buffer.endsWith("\n") ? "" : cleanedChunks.pop() || ""; // Keep the last incomplete part
+    }
+  }
 
-  return new SSE(url, {
-    headers: { "Content-Type": "application/json" },
-    payload: JSON.stringify(payload),
-  });
+  return streamGenerator(); // Return the async generator
 }
diff --git a/FaqGen/ui/svelte/src/lib/shared/Store.ts b/FaqGen/ui/svelte/src/lib/shared/Store.ts
index 99d1605e36..803c3fca2b 100644
--- a/FaqGen/ui/svelte/src/lib/shared/Store.ts
+++ b/FaqGen/ui/svelte/src/lib/shared/Store.ts
@@ -17,3 +17,7 @@ import { writable } from "svelte/store";
 export let kb_id = writable("");
 
 export let loading = writable(false);
+
+export const uploadFile = writable<Blob | null>(null);
+
+export let uploadFilesName = writable("");
diff --git a/FaqGen/ui/svelte/src/routes/+page.svelte b/FaqGen/ui/svelte/src/routes/+page.svelte
index 40ef665b5c..69485ce93a 100644
--- a/FaqGen/ui/svelte/src/routes/+page.svelte
+++ b/FaqGen/ui/svelte/src/routes/+page.svelte
@@ -19,7 +19,7 @@
   import Doc from "$lib/doc.svelte";
   import Faq from "$lib/faq.svelte";
   import { fetchTextStream } from "$lib/shared/Network.js";
-  import { loading } from "$lib/shared/Store.js";
+  import { loading, uploadFilesName, uploadFile } from "$lib/shared/Store.js";
   import { onMount } from "svelte";
   import { scrollToBottom } from "$lib/shared/Utils.js";
 
@@ -31,20 +31,25 @@
     console.log("scrollToDiv", scrollToDiv);
   });
 
-  let code_output: string = "";
-  let query: string = "";
-  let deleteFlag: boolean = false;
-
   const callTextStream = async (
     query: string,
     urlSuffix: string,
-    params: string,
+    params: string
   ) => {
-    messages = "";
-    const eventSource = await fetchTextStream(query, urlSuffix, params);
+    // Fetch the stream
+    const eventStream = await fetchTextStream(
+      query,
+      params,
+      $uploadFile,
+      $uploadFilesName
+    );
+
+    // Process the stream as an async iterator
+    try {
+      for await (const chunk of eventStream) {
+      let Msg = chunk;
+      console.log('Msg', Msg);
 
-    eventSource.addEventListener("message", (e: any) => {
-      let Msg = e.data;
       if (Msg !== "[DONE]") {
         let res = JSON.parse(Msg);
         let logs = res.ops;
@@ -52,7 +57,7 @@
         logs.forEach((log: { op: string; path: string; value: any }) => {
           if (log.op === "add") {
             if (
-              log.value !== "</s>" &&
+              log.value !== "<|eot_id|>" &&
               log.path.endsWith("/streamed_output/-") &&
               log.path.length > "/streamed_output/-".length
             ) {
@@ -65,8 +70,10 @@
         loading.set(false);
         scrollToBottom(scrollToDiv);
       }
-    });
-    eventSource.stream();
+      }
+    } catch (error) {
+      console.error("Error processing the stream:", error);
+    }
   };
 
   async function handleGenerateFaq(e) {
diff --git a/GraphRAG/Dockerfile b/GraphRAG/Dockerfile
new file mode 100644
index 0000000000..bf01c01b2e
--- /dev/null
+++ b/GraphRAG/Dockerfile
@@ -0,0 +1,33 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    git \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+WORKDIR /home/user/
+RUN git clone https://github.com/opea-project/GenAIComps.git
+
+WORKDIR /home/user/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
+    pip install --no-cache-dir langchain_core
+
+COPY ./graphrag.py /home/user/graphrag.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
+
+ENTRYPOINT ["python", "graphrag.py"]
diff --git a/GraphRAG/README.md b/GraphRAG/README.md
new file mode 100644
index 0000000000..10e281ae90
--- /dev/null
+++ b/GraphRAG/README.md
@@ -0,0 +1,254 @@
+# GraphRAG Application
+
+While naive RAG works well in fetching precise information it fails on global questions directed at an entire text corpus, such as "What are the main themes in the dataset?".
+GraphRAG was introduced by Microsoft paper "From Local to Global: A Graph RAG Approach to Query-Focused Summarization". The key elements are:
+
+- Uses LLM to derive an entity knowledge graph from the source documents
+- Uses hierarchical leiden algorithm to identify communities of closely-related entities and summaries are extracted for each community
+- For an input query the relevant communities are identified and partial answers are generated from each of the community summaries (query-focused summarization (QFS))
+- There is a final generation stage that responds to the query based on the intermediate community answers.
+
+## Deploy GraphRAG Service
+
+The GraphRAG service can be effortlessly deployed on Intel Gaudi2, Intel Xeon Scalable Processors.
+
+Quick Start Deployment Steps:
+
+1. Set up the environment variables.
+2. Run Docker Compose.
+3. Consume the GraphRAG Service.
+
+Note: If you do not have docker installed you can run this script to install docker : `bash docker_compose/install_docker.sh`
+
+### Quick Start: 1.Setup Environment Variable
+
+To set up environment variables for deploying GraphRAG services, follow these steps:
+
+1. Set the required private environment variables:
+
+   ```bash
+   export host_ip=${your_hostname IP} #local IP, i.e "192.168.1.1"
+   export NEO4J_URI=${your_neo4j_url}
+   export NEO4J_USERNAME=${your_neo4j_username}
+   export NEO4J_PASSWORD=${your_neo4j_password}
+   export PYTHONPATH=${path_to_comps}
+   export OPENAI_KEY=${your_openai_api_key} #optional, when not provided will use smaller models TGI/TEI
+   export HUGGINGFACEHUB_API_TOKEN=${your_hf_token} #needed for TGI/TEI models
+   ```
+
+2. If you are in a proxy environment, also set the proxy-related environment variables:
+
+   ```bash
+   export http_proxy="Your_HTTP_Proxy"
+   export https_proxy="Your_HTTPs_Proxy"
+   export no_proxy=$no_proxy,${host_ip} #important to add {host_ip} for containers communication
+   ```
+
+3. Set up other environment variables:
+
+   ```bash
+   # on Gaudi
+   source ./docker_compose/intel/hpu/gaudi/set_env.sh
+   ```
+
+### Quick Start: 2.Run Docker Compose
+
+If the microservice images are available in Docker Hub they will be pulled, otherwise you will need to build the container images manually. Please refer to the 'Build Docker Images' in [Guide](../ChatQnA/docker_compose/intel/cpu/xeon/README.md). [test_compose.sh](tests/test_compose.sh) can be a good resource as it shows how to do image build, starting services, validated each microservices and megaservices. This is what is used in CI/CD.
+
+Docker compose will start 8 services: ![8 servicesi in GraphRAG](assets/8microservices.png)
+
+```bash
+cd GraphRAG/docker_compose/intel/hpu/gaudi
+docker compose -f compose.yaml up -d
+```
+
+### QuickStart: 3.Upload RAG Files and Consume the GraphRAG Service
+
+To chat with retrieved information, you need to upload a file using `Dataprep` service.
+
+Here is an example of `Nike 2023` pdf.
+
+```bash
+# download pdf file
+wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf
+# upload pdf file with dataprep
+curl -X POST "http://${host_ip}:6004/v1/dataprep" \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./nke-10k-2023.pdf"
+```
+
+```bash
+curl http://${host_ip}:8888/v1/graphrag \
+    -H "Content-Type: application/json"  \
+    -d '{
+        "model": "gpt-4o-mini","messages": [{"role": "user","content": "What is the revenue of Nike in 2023?
+    "}]}'
+```
+
+## Architecture and Deploy details
+
+The GraphRAG example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
+
+```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 400
+    rankSpacing: 100
+    curve: linear
+  themeVariables:
+    fontSize: 50px
+---
+flowchart LR
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style GraphRAG-MegaService stroke:#000000
+
+    %% Subgraphs %%
+    subgraph GraphRAG-MegaService["GraphRAG MegaService "]
+        direction LR
+        RET([Retrieval MicroService]):::blue
+        LLM([LLM MicroService]):::blue
+        EM([Embedding MicroService]):::blue
+    end
+    subgraph UserInterface[" User Interface "]
+        direction LR
+        a([User Input Query]):::orchid
+        Ingest([Ingest data]):::orchid
+        UI([UI server<br>]):::orchid
+    end
+
+
+    GDB{{Graph DB<br><br>}}
+    DP([Data Preparation MicroService]):::blue
+    GW([GraphRAG GateWay<br>]):::orange
+
+
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] --> UI
+    UI --> DP
+
+    %% interactions buried inside the DP and RET microservice implementations
+    DP <-.-> EM
+    DP <-.-> LLM
+    RET <-.-> EM
+    RET <-.-> LLM
+
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] --> UI
+    UI --> GW
+    GW <==> GraphRAG-MegaService
+    RET ==> LLM
+
+
+    direction TB
+    %% Graph DB interaction
+    RET <-.-> |d|GDB
+    DP <-.-> |d|GDB
+
+    linkStyle 2 stroke:#000000,stroke-width:2px;
+    linkStyle 3 stroke:#000000,stroke-width:2px;
+    linkStyle 4 stroke:#000000,stroke-width:2px;
+    linkStyle 5 stroke:#000000,stroke-width:2px;
+
+
+```
+
+> **Note**: The Dataprep and Retriever microservices use the LLM Microservice and Embedding Microservice in their implementation. For example, Dataprep uses LLM to extract entities and relationships from text to build graph and Retriever uses LLM to summarize communities (these are clusters of similar entities and their properties). Those endpoint interactions with the corresponding prompt templates are buried in the microservice implementation thus not managed by the megaservice orchestrator scheduler and not exposed in the megaservice. Shown as thin black lines in diagram.
+
+This GraphRAG use case performs RAG using Llama-index, Neo4J Graph Property Store and Text Generation Inference on [Intel Gaudi2](https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi-overview.html) or [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon.html).
+In the below, we provide a table that describes for each microservice component in the GraphRAG architecture, the default configuration of the open source project, hardware, port, and endpoint.
+
+Gaudi default compose.yaml
+| MicroService | Open Source Project | HW | Port | Endpoint |
+| ------------ | ------------------- | ----- | ---- | -------------------- |
+| Embedding | Llama-index | Xeon | 6006 | /v1/embaddings |
+| Retriever | Llama-index, Neo4j | Xeon | 6009 | /v1/retrieval |
+| LLM | Llama-index, TGI | Gaudi | 6005 | /v1/chat/completions |
+| Dataprep | Neo4j, LlamaIndex | Xeon | 6004 | /v1/dataprep |
+
+### Models Selection
+
+GraphRAG quality dependents heavily on the ability to extract a high quality graph. We highly recommend using the best model available to you. Table below shows default models specified in the codebase when OPENAI_API_KEY is available and for local inference w TEI/TGI. The local models are small since those will be used in CI/CD but users should improve upon these by changing the `xxx_MODEL_ID` in `docker_compose/xxx/set_env.sh`.
+
+Working on a table comparison of various model sizes vs. naive RAG with a dataset that reflects well the benefits of GraphRAG. Stay tuned!
+
+| Service   | Model                                 |
+| --------- | ------------------------------------- |
+| Embedding | BAAI/bge-base-en-v1.5                 |
+| Embedding | "text-embedding-3-small"              |
+| LLM       | gpt-4o                                |
+| LLM       | "meta-llama/Meta-Llama-3-8B-Instruct" |
+
+## Consume GraphRAG Service with RAG
+
+### Check Service Status
+
+Before consuming GraphRAG Service, make sure each microservice is ready by checking the docker logs of each microservice. [test_compose.sh](tests/test_compose.sh) can be a good resource as it shows how CI/CD validated each microservices based on returned HTTP status and response body.
+
+```bash
+docker logs container_name
+```
+
+### Upload RAG Files
+
+To chat with retrieved information, you need to upload a file using `Dataprep` service.
+
+Here is an example of `Nike 2023` pdf.
+
+```bash
+# download pdf file
+wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf
+# upload pdf file with dataprep
+curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./nke-10k-2023.pdf"
+```
+
+### Consume GraphRAG Service
+
+Two ways of consuming GraphRAG Service:
+
+1. Use cURL command on terminal
+
+```bash
+curl http://${host_ip}:8888/v1/graphrag \
+    -H "Content-Type: application/json"  \
+    -d '{
+        "model": "gpt-4o-mini","messages": [{"role": "user","content": "Who is John Brady and has he had any confrontations?
+    "}]}'
+```
+
+2. Access via frontend
+
+   To access the frontend, open the following URL in your browser: `http://{host_ip}:5173`
+
+   By default, the UI runs on port 5173 internally.
+
+   If you choose conversational UI, use this URL: `http://{host_ip}:5174`
+
+## Troubleshooting
+
+1. If you get errors like "Access Denied", [validate micro service](https://github.com/opea-project/GenAIExamples/blob/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md#validate-microservices) first. A simple example:
+
+   ```bash
+   http_proxy="" curl ${host_ip}:6006/embed -X POST  -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json'
+   ```
+
+2. (Docker only) If all microservices work well, check the port ${host_ip}:8888, the port may be allocated by other users, you can modify the `compose.yaml`.
+
+3. (Docker only) If you get errors like "The container name is in use", change container name in `compose.yaml`.
+
+## Monitoring OPEA Service with Prometheus and Grafana dashboard
+
+OPEA microservice deployment can easily be monitored through Grafana dashboards in conjunction with Prometheus data collection. Follow the [README](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/grafana/README.md) to setup Prometheus and Grafana servers and import dashboards to monitor the OPEA service.
+
+![chatqna dashboards](../ChatQnA/assets/img/chatqna_dashboards.png)
+![tgi dashboard](../ChatQnA//assets/img/tgi_dashboard.png)
diff --git a/GraphRAG/assets/8microservices.png b/GraphRAG/assets/8microservices.png
new file mode 100644
index 0000000000..f11037a8bb
Binary files /dev/null and b/GraphRAG/assets/8microservices.png differ
diff --git a/GraphRAG/assets/img/chat_ui_init.png b/GraphRAG/assets/img/chat_ui_init.png
new file mode 100644
index 0000000000..392228d5ae
Binary files /dev/null and b/GraphRAG/assets/img/chat_ui_init.png differ
diff --git a/GraphRAG/assets/img/chat_ui_response.png b/GraphRAG/assets/img/chat_ui_response.png
new file mode 100644
index 0000000000..c65c5d1c21
Binary files /dev/null and b/GraphRAG/assets/img/chat_ui_response.png differ
diff --git a/GraphRAG/assets/img/chat_ui_upload.png b/GraphRAG/assets/img/chat_ui_upload.png
new file mode 100644
index 0000000000..1e1ba85d41
Binary files /dev/null and b/GraphRAG/assets/img/chat_ui_upload.png differ
diff --git a/GraphRAG/assets/img/chatqna_architecture.png b/GraphRAG/assets/img/chatqna_architecture.png
new file mode 100644
index 0000000000..1392fca5b7
Binary files /dev/null and b/GraphRAG/assets/img/chatqna_architecture.png differ
diff --git a/GraphRAG/assets/img/chatqna_dashboards.png b/GraphRAG/assets/img/chatqna_dashboards.png
new file mode 100644
index 0000000000..24abb6533b
Binary files /dev/null and b/GraphRAG/assets/img/chatqna_dashboards.png differ
diff --git a/GraphRAG/assets/img/chatqna_flow_chart.png b/GraphRAG/assets/img/chatqna_flow_chart.png
new file mode 100644
index 0000000000..7966497b12
Binary files /dev/null and b/GraphRAG/assets/img/chatqna_flow_chart.png differ
diff --git a/GraphRAG/assets/img/conversation_ui_init.png b/GraphRAG/assets/img/conversation_ui_init.png
new file mode 100644
index 0000000000..6bd5f24472
Binary files /dev/null and b/GraphRAG/assets/img/conversation_ui_init.png differ
diff --git a/GraphRAG/assets/img/conversation_ui_response.png b/GraphRAG/assets/img/conversation_ui_response.png
new file mode 100644
index 0000000000..be9ed60457
Binary files /dev/null and b/GraphRAG/assets/img/conversation_ui_response.png differ
diff --git a/GraphRAG/assets/img/conversation_ui_upload.png b/GraphRAG/assets/img/conversation_ui_upload.png
new file mode 100644
index 0000000000..b05c33819a
Binary files /dev/null and b/GraphRAG/assets/img/conversation_ui_upload.png differ
diff --git a/GraphRAG/assets/img/tgi_dashboard.png b/GraphRAG/assets/img/tgi_dashboard.png
new file mode 100644
index 0000000000..8fcd3ac568
Binary files /dev/null and b/GraphRAG/assets/img/tgi_dashboard.png differ
diff --git a/GraphRAG/docker_compose/install_docker.sh b/GraphRAG/docker_compose/install_docker.sh
new file mode 100644
index 0000000000..d2a495bde3
--- /dev/null
+++ b/GraphRAG/docker_compose/install_docker.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Update the package index
+sudo apt-get -y update
+
+# Install prerequisites
+sudo apt-get -y install ca-certificates curl --no-install-recommends --fix-missing
+
+# Create the directory for the Docker GPG key
+sudo install -m 0755 -d /etc/apt/keyrings
+
+# Add Docker's official GPG key
+sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
+
+# Set permissions for the GPG key
+sudo chmod a+r /etc/apt/keyrings/docker.asc
+
+# Add Docker repository to the sources list
+echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
+  $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+
+# Update the package index with Docker packages
+sudo apt-get -y update
+
+# Install Docker packages
+sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin --no-install-recommends --fix-missing
+
+# add existing user
+sudo usermod -aG docker $USER
+
+# Optional: Verify that Docker is installed correctly
+sudo docker --version
diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
new file mode 100644
index 0000000000..2cedab39e0
--- /dev/null
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -0,0 +1,181 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+services:
+  neo4j-apoc:
+    image: neo4j:latest
+    container_name: neo4j-apoc
+    volumes:
+        - /$HOME/neo4j/logs:/logs
+        - /$HOME/neo4j/config:/config
+        - /$HOME/neo4j/data:/data
+        - /$HOME/neo4j/plugins:/plugins
+    ipc: host
+    environment:
+      - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD}
+      - NEO4J_PLUGINS=["apoc"]
+      - NEO4J_apoc_export_file_enabled=true
+      - NEO4J_apoc_import_file_enabled=true
+      - NEO4J_apoc_import_file_use__neo4j__config=true
+      - NEO4J_dbms_security_procedures_unrestricted=apoc.\*
+    ports:
+      - "7474:7474"
+      - "7687:7687"
+    restart: always
+  tei-embedding-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      NO_PROXY: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    ipc: host
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  tgi-gaudi-service:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    container_name: tgi-gaudi-server
+    ports:
+      - "6005:80"
+    volumes:
+      - "./data:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      NO_PROXY: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
+  dataprep-neo4j-llamaindex:
+    image: opea/dataprep-neo4j-llamaindex:latest
+    container_name: dataprep-neo4j-server
+    depends_on:
+      - neo4j-apoc
+      - tgi-gaudi-service
+      - tei-embedding-service
+    ports:
+      - "6004:6004"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      host_ip: ${host_ip}
+      NEO4J_URL: ${NEO4J_URL}
+      NEO4J_USERNAME: ${NEO4J_USERNAME}
+      NEO4J_PASSWORD: ${NEO4J_PASSWORD}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL}
+      OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
+      EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      LOGFLAG: ${LOGFLAG}
+    restart: unless-stopped
+  retriever-neo4j-llamaindex:
+    image: opea/retriever-neo4j-llamaindex:latest
+    container_name: retriever-neo4j-server
+    depends_on:
+      - neo4j-apoc
+      - tgi-gaudi-service
+      - tei-embedding-service
+    ports:
+      - "6009:6009"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      host_ip: ${host_ip}
+      NEO4J_URL: ${NEO4J_URL}
+      NEO4J_USERNAME: ${NEO4J_USERNAME}
+      NEO4J_PASSWORD: ${NEO4J_PASSWORD}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL}
+      OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
+      EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      LOGFLAG: ${LOGFLAG}
+    restart: unless-stopped
+  graphrag-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/graphrag:${TAG:-latest}
+    container_name: graphrag-gaudi-backend-server
+    depends_on:
+      - neo4j-apoc
+      - tei-embedding-service
+      - retriever-neo4j-llamaindex
+      - tgi-gaudi-service
+    ports:
+      - "8888:8888"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - MEGA_SERVICE_HOST_IP=graphrag-gaudi-backend-server
+      - RETRIEVER_SERVICE_HOST_IP=retriever-neo4j-llamaindex
+      - RETRIEVER_SERVICE_PORT=6009
+      - RETRIEVER_SERVICE_PORT=${RETRIEVER_SERVICE_PORT:-80}
+      - LLM_SERVER_HOST_IP=tgi-gaudi-service
+      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
+      - LOGFLAG=${LOGFLAG}
+    ipc: host
+    restart: always
+  chatqna-gaudi-ui-server:
+    image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
+    container_name: chatqna-gaudi-ui-server
+    depends_on:
+      - graphrag-gaudi-backend-server
+    ports:
+      - "5173:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+    ipc: host
+    restart: always
+  chatqna-gaudi-nginx-server:
+    image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
+    container_name: chatqna-gaudi-nginx-server
+    depends_on:
+      - graphrag-gaudi-backend-server
+      - chatqna-gaudi-ui-server
+    ports:
+      - "${NGINX_PORT:-80}:80"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
+      - FRONTEND_SERVICE_PORT=5173
+      - BACKEND_SERVICE_NAME=graphrag
+      - BACKEND_SERVICE_IP=graphrag-gaudi-backend-server
+      - BACKEND_SERVICE_PORT=8888
+      - DATAPREP_SERVICE_IP=dataprep-neo4j-llamaindex
+      - DATAPREP_SERVICE_PORT=6004
+    ipc: host
+    restart: always
+networks:
+  default:
+    driver: bridge
diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
new file mode 100644
index 0000000000..1d4e8d7d5c
--- /dev/null
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Remember to set your private variables mentioned in README
+
+# host_ip, OPENAI_API_KEY, HUGGINGFACEHUB_API_TOKEN, proxies...
+
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+export OPENAI_LLM_MODEL="gpt-4o"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TGI_LLM_ENDPOINT="http://${host_ip}:6005"
+export NEO4J_URL="bolt://${host_ip}:7687"
+export NEO4J_USERNAME=neo4j
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep"
+export LOGFLAG=True
+export RETRIEVER_SERVICE_PORT=6009
diff --git a/GraphRAG/docker_image_build/build.yaml b/GraphRAG/docker_image_build/build.yaml
new file mode 100644
index 0000000000..3187e7cee9
--- /dev/null
+++ b/GraphRAG/docker_image_build/build.yaml
@@ -0,0 +1,49 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  graphrag:
+    build:
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+      context: ../
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/graphrag:${TAG:-latest}
+  retriever-neo4j-llamaindex:
+    build:
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+      context: GenAIComps
+      dockerfile: comps/retrievers/neo4j/llama_index/Dockerfile
+    image: ${REGISTRY:-opea}/retriever-neo4j-llamaindex:${TAG:-latest}
+  dataprep-neo4j-llamaindex:
+    build:
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+      context: GenAIComps
+      dockerfile: comps/dataprep/neo4j/llama_index/Dockerfile
+    image: ${REGISTRY:-opea}/dataprep-neo4j-llamaindex:${TAG:-latest}
+  chatqna-gaudi-nginx-server:
+    build:
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+      context: GenAIComps
+      dockerfile: comps/nginx/Dockerfile
+    image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
+  chatqna-gaudi-ui-server:
+    build:
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+      context: ../ui
+      dockerfile: ./docker/Dockerfile
+    image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
diff --git a/GraphRAG/graphrag.py b/GraphRAG/graphrag.py
new file mode 100644
index 0000000000..022a3aad31
--- /dev/null
+++ b/GraphRAG/graphrag.py
@@ -0,0 +1,150 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+import json
+import os
+import re
+
+from comps import GraphragGateway, MicroService, ServiceOrchestrator, ServiceType
+from langchain_core.prompts import PromptTemplate
+
+
+class ChatTemplate:
+    @staticmethod
+    def generate_rag_prompt(question, documents):
+        context_str = "\n".join(documents)
+        if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
+            # chinese context
+            template = """
+### 你将扮演一个乐于助人、尊重他人并诚实的助手，你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案，请避免分享不准确的信息。
+### 搜索结果：{context}
+### 问题：{question}
+### 回答：
+"""
+        else:
+            template = """
+### You are a helpful, respectful and honest assistant to help the user with questions. \
+Please combine the following intermediate answers into a final, conscise and coherent response. \
+refer to the search results obtained from the local knowledge base. \
+If you don't know the answer to a question, please don't share false information. \n
+### Intermediate answers: {context} \n
+### Question: {question} \n
+### Answer:
+"""
+        return template.format(context=context_str, question=question)
+
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
+RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
+RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
+LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
+LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
+
+
+def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
+    if self.services[cur_node].service_type == ServiceType.RETRIEVER:
+        print("make no changes for retriever inputs. AlreadyCheckCompletionRequest")
+    elif self.services[cur_node].service_type == ServiceType.LLM:
+        # convert TGI/vLLM to unified OpenAI /v1/chat/completions format
+        next_inputs = {}
+        next_inputs["model"] = "tgi"  # specifically clarify the fake model to make the format unified
+        next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
+        next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
+        next_inputs["top_p"] = llm_parameters_dict["top_p"]
+        next_inputs["stream"] = inputs["streaming"]
+        next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
+        # next_inputs["presence_penalty"] = inputs["presence_penalty"]
+        # next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
+        next_inputs["temperature"] = inputs["temperature"]
+        inputs = next_inputs
+    print("inputs after align:\n", inputs)
+    return inputs
+
+
+def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
+    next_data = {}
+    if self.services[cur_node].service_type == ServiceType.RETRIEVER:
+        docs = [doc["text"] for doc in data["retrieved_docs"]]
+        # handle template
+        # if user provides template, then format the prompt with it
+        # otherwise, use the default template
+        prompt = inputs.messages[0]["content"]
+        chat_template = llm_parameters_dict["chat_template"]
+        if chat_template:
+            prompt_template = PromptTemplate.from_template(chat_template)
+            input_variables = prompt_template.input_variables
+            if sorted(input_variables) == ["context", "question"]:
+                prompt = prompt_template.format(question=prompt, context="\n".join(docs))
+            elif input_variables == ["question"]:
+                prompt = prompt_template.format(question=prompt)
+            else:
+                print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
+                prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
+        else:
+            print("no rerank no chat template")
+            prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
+
+        next_data["inputs"] = prompt
+    else:
+        next_data = data
+
+    return next_data
+
+
+def align_generator(self, gen, **kwargs):
+    # openai reaponse format
+    # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
+    print("generator in align generator:\n", gen)
+    for line in gen:
+        line = line.decode("utf-8")
+        start = line.find("{")
+        end = line.rfind("}") + 1
+
+        json_str = line[start:end]
+        try:
+            # sometimes yield empty chunk, do a fallback here
+            json_data = json.loads(json_str)
+            if json_data["choices"][0]["finish_reason"] != "eos_token":
+                yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
+        except Exception as e:
+            yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
+    yield "data: [DONE]\n\n"
+
+
+class GraphRAGService:
+    def __init__(self, host="0.0.0.0", port=8000):
+        self.host = host
+        self.port = port
+        ServiceOrchestrator.align_inputs = align_inputs
+        ServiceOrchestrator.align_outputs = align_outputs
+        ServiceOrchestrator.align_generator = align_generator
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVER_SERVICE_HOST_IP,
+            port=RETRIEVER_SERVICE_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+
+        llm = MicroService(
+            name="llm",
+            host=LLM_SERVER_HOST_IP,
+            port=LLM_SERVER_PORT,
+            endpoint="/v1/chat/completions",
+            use_remote_service=True,
+            service_type=ServiceType.LLM,
+        )
+        self.megaservice.add(retriever).add(llm)
+        self.megaservice.flow_to(retriever, llm)
+        self.gateway = GraphragGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+
+if __name__ == "__main__":
+    graphrag = GraphRAGService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    graphrag.add_remote_service()
diff --git a/GraphRAG/tests/test_compose.sh b/GraphRAG/tests/test_compose.sh
new file mode 100755
index 0000000000..72b77b642f
--- /dev/null
+++ b/GraphRAG/tests/test_compose.sh
@@ -0,0 +1,207 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+
+WORKPATH=$(dirname "$PWD")
+WORKPATH=/home/rbrugaro/GenAIExamples/GraphRAG
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+
+function build_docker_images() {
+    cd $WORKPATH/docker_image_build
+    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
+
+    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
+    service_list="graphrag dataprep-neo4j-llamaindex retriever-neo4j-llamaindex chatqna-gaudi-ui-server chatqna-gaudi-nginx-server"
+    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
+
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    docker pull neo4j:latest
+    docker images && sleep 1s
+}
+
+function start_services() {
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+
+    # Start Docker Containers
+    docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
+
+    n=0
+    until [[ "$n" -ge 100 ]]; do
+        docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log
+        if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
+            break
+        fi
+        sleep 5s
+        n=$((n+1))
+    done
+}
+
+function validate_service() {
+    local URL="$1"
+    local EXPECTED_RESULT="$2"
+    local SERVICE_NAME="$3"
+    local DOCKER_NAME="$4"
+    local INPUT_DATA="$5"
+
+    if [[ $SERVICE_NAME == *"extract_graph_neo4j"* ]]; then
+        cd $LOG_PATH
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"neo4j-apoc"* ]]; then
+         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" "$URL")
+    else
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+    fi
+    HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
+    RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
+
+    docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+
+    # check response status
+    if [ "$HTTP_STATUS" -ne "200" ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        exit 1
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+    fi
+    # check response body
+    if [[ "$SERVICE_NAME" == "neo4j-apoc" ]]; then
+        echo "[ $SERVICE_NAME ] Skipping content check for neo4j-apoc."
+    else
+        if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then
+            echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
+            exit 1
+        else
+            echo "[ $SERVICE_NAME ] Content is as expected."
+        fi
+    fi
+
+    sleep 5s
+}
+
+function validate_microservices() {
+    # Check if the microservices are running correctly.
+
+    # validate neo4j-apoc
+    validate_service \
+        "${ip_address}:7474" \
+        "200 OK" \
+        "neo4j-apoc" \
+        "neo4j-apoc" \
+        ""
+
+    # tei for embedding service
+    validate_service \
+        "${ip_address}:6006/embed" \
+        "[[" \
+        "tei-embedding-service" \
+        "tei-embedding-server" \
+        '{"inputs":"What is Deep Learning?"}'
+
+    sleep 1m # retrieval can't curl as expected, try to wait for more time
+
+    # test /v1/dataprep graph extraction
+    echo "Like many companies in the O&G sector, the stock of Chevron (NYSE:CVX) has declined about 10% over the past 90-days despite the fact that Q2 consensus earnings estimates have risen sharply (~25%) during that same time frame. Over the years, Chevron has kept a very strong balance sheet. FirstEnergy (NYSE:FE – Get Rating) posted its earnings results on Tuesday. The utilities provider reported $0.53 earnings per share for the quarter, topping the consensus estimate of $0.52 by $0.01, RTT News reports. FirstEnergy had a net margin of 10.85% and a return on equity of 17.17%. The Dáil was almost suspended on Thursday afternoon after Sinn Féin TD John Brady walked across the chamber and placed an on-call pager in front of the Minister for Housing Darragh O’Brien during a debate on retained firefighters. Mr O’Brien said Mr Brady had taken part in an act of theatre that was obviously choreographed.Around 2,000 retained firefighters around the country staged a second day of industrial action on Tuesday and are due to start all out-strike action from next Tuesday. The mostly part-time workers, who keep the services going outside of Ireland’s larger urban centres, are taking industrial action in a dispute over pay and working conditions. Speaking in the Dáil, Sinn Féin deputy leader Pearse Doherty said firefighters had marched on Leinster House today and were very angry at the fact the Government will not intervene. Reintroduction of tax relief on mortgages needs to be considered, O’Brien says. Martin withdraws comment after saying People Before Profit would ‘put the jackboot on people’ Taoiseach ‘propagated fears’ farmers forced to rewet land due to nature restoration law – Cairns An intervention is required now. I’m asking you to make an improved offer in relation to pay for retained firefighters, Mr Doherty told the housing minister.I’m also asking you, and challenging you, to go outside after this Order of Business and meet with the firefighters because they are just fed up to the hilt in relation to what you said.Some of them have handed in their pagers to members of the Opposition and have challenged you to wear the pager for the next number of weeks, put up with an €8,600 retainer and not leave your community for the two and a half kilometres and see how you can stand over those type of pay and conditions. At this point, Mr Brady got up from his seat, walked across the chamber and placed the pager on the desk in front of Mr O’Brien. Ceann Comhairle Seán Ó Fearghaíl said the Sinn Féin TD was completely out of order and told him not to carry out a charade in this House, adding it was absolutely outrageous behaviour and not to be encouraged.Mr O’Brien said Mr Brady had engaged in an act of theatre here today which was obviously choreographed and was then interrupted with shouts from the Opposition benches. Mr Ó Fearghaíl said he would suspend the House if this racket continues.Mr O’Brien later said he said he was confident the dispute could be resolved and he had immense regard for firefighters. The minister said he would encourage the unions to re-engage with the State’s industrial relations process while also accusing Sinn Féin of using the issue for their own political gain." > $LOG_PATH/dataprep_file.txt
+    validate_service \
+        "http://${ip_address}:6004/v1/dataprep" \
+        "Data preparation succeeded" \
+        "extract_graph_neo4j" \
+        "dataprep-neo4j-server"
+
+    # retrieval microservice
+    validate_service \
+        "${ip_address}:6009/v1/retrieval" \
+        "Retrieval of answers from community summaries successful" \
+        "retriever_community_answers_neo4j" \
+        "retriever-neo4j-server" \
+        "{\"model\": \"gpt-4o-mini\",\"messages\": [{\"role\": \"user\",\"content\": \"Who is John Brady and has he had any confrontations?\"}]}"
+
+    # tgi for llm service
+    validate_service \
+        "${ip_address}:6005/generate" \
+        "generated_text" \
+        "tgi-gaudi-service" \
+        "tgi-gaudi-server" \
+        '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
+}
+
+function validate_megaservice() {
+    # Curl the Mega Service
+    validate_service \
+        "${ip_address}:8888/v1/graphrag" \
+        "data: " \
+        "graphrag-megaservice" \
+        "graphrag-gaudi-backend-server" \
+        "{\"model\": \"gpt-4o-mini\",\"messages\": [{\"role\": \"user\",\"content\": \"Who is John Brady and has he had any confrontations?\"}]}"
+
+}
+
+function validate_frontend() {
+    cd $WORKPATH/ui/svelte
+    local conda_env_name="OPEA_e2e"
+    export PATH=${HOME}/miniforge3/bin/:$PATH
+    if conda info --envs | grep -q "$conda_env_name"; then
+        echo "$conda_env_name exist!"
+    else
+        conda create -n ${conda_env_name} python=3.12 -y
+    fi
+    source activate ${conda_env_name}
+
+    sed -i "s/localhost/$ip_address/g" playwright.config.ts
+
+    conda install -c conda-forge nodejs -y
+    npm install && npm ci && npx playwright install --with-deps
+    node -v && npm -v && pip list
+
+    exit_status=0
+    npx playwright test || exit_status=$?
+
+    if [ $exit_status -ne 0 ]; then
+        echo "[TEST INFO]: ---------frontend test failed---------"
+        exit $exit_status
+    else
+        echo "[TEST INFO]: ---------frontend test passed---------"
+    fi
+}
+
+function stop_docker() {
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+    docker compose  -f compose.yaml stop && docker compose -f compose.yaml rm -f
+}
+
+function main() {
+
+    stop_docker
+    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    start_time=$(date +%s)
+    start_services
+    end_time=$(date +%s)
+    duration=$((end_time-start_time))
+    echo "Mega service start duration is $duration s"
+
+    if [ "${mode}" == "perf" ]; then
+        python3 $WORKPATH/tests/chatqna_benchmark.py
+    elif [ "${mode}" == "" ]; then
+        validate_microservices
+        validate_megaservice
+        validate_frontend
+    fi
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main
diff --git a/GraphRAG/ui/docker/Dockerfile b/GraphRAG/ui/docker/Dockerfile
new file mode 100644
index 0000000000..c609e0d646
--- /dev/null
+++ b/GraphRAG/ui/docker/Dockerfile
@@ -0,0 +1,26 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Use node 20.11.1 as the base image
+FROM node:20.11.1
+
+# Update package manager and install Git
+RUN apt-get update -y && apt-get install -y git --no-install-recommends --fix-missing
+
+# Copy the front-end code repository
+COPY svelte /home/user/svelte
+
+# Set the working directory
+WORKDIR /home/user/svelte
+
+# Install front-end dependencies
+RUN npm install
+
+# Build the front-end application
+RUN npm run build
+
+# Expose the port of the front-end application
+EXPOSE 5173
+
+# Run the front-end application in preview mode
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
diff --git a/GraphRAG/ui/docker/Dockerfile.react b/GraphRAG/ui/docker/Dockerfile.react
new file mode 100644
index 0000000000..a92dba7d82
--- /dev/null
+++ b/GraphRAG/ui/docker/Dockerfile.react
@@ -0,0 +1,19 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Use node 20.11.1 as the base image
+FROM node:20.11.1 as vite-app
+ 
+COPY react /usr/app/react
+WORKDIR /usr/app/react
+
+RUN ["npm", "install"]
+RUN ["npm", "run", "build"]
+
+FROM nginx:alpine
+
+COPY --from=vite-app /usr/app/react/dist /usr/share/nginx/html
+COPY ./react/env.sh /docker-entrypoint.d/env.sh
+
+COPY ./react/nginx.conf /etc/nginx/conf.d/default.conf
+RUN chmod +x /docker-entrypoint.d/env.sh
diff --git a/GraphRAG/ui/react/.env b/GraphRAG/ui/react/.env
new file mode 100644
index 0000000000..ae0bd3732c
--- /dev/null
+++ b/GraphRAG/ui/react/.env
@@ -0,0 +1,2 @@
+VITE_BACKEND_SERVICE_ENDPOINT=http://backend_address:8888/v1/chatqna
+VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep
diff --git a/GraphRAG/ui/react/.env.production b/GraphRAG/ui/react/.env.production
new file mode 100644
index 0000000000..9922d60127
--- /dev/null
+++ b/GraphRAG/ui/react/.env.production
@@ -0,0 +1,2 @@
+VITE_BACKEND_SERVICE_ENDPOINT=APP_BACKEND_SERVICE_ENDPOINT
+VITE_DATA_PREP_SERVICE_URL=APP_DATA_PREP_SERVICE_URL
diff --git a/GraphRAG/ui/react/.eslintrc.cjs b/GraphRAG/ui/react/.eslintrc.cjs
new file mode 100644
index 0000000000..78174f6836
--- /dev/null
+++ b/GraphRAG/ui/react/.eslintrc.cjs
@@ -0,0 +1,11 @@
+module.exports = {
+  root: true,
+  env: { browser: true, es2020: true },
+  extends: ["eslint:recommended", "plugin:@typescript-eslint/recommended", "plugin:react-hooks/recommended"],
+  ignorePatterns: ["dist", ".eslintrc.cjs"],
+  parser: "@typescript-eslint/parser",
+  plugins: ["react-refresh"],
+  rules: {
+    "react-refresh/only-export-components": ["warn", { allowConstantExport: true }],
+  },
+};
diff --git a/GraphRAG/ui/react/.gitignore b/GraphRAG/ui/react/.gitignore
new file mode 100644
index 0000000000..a547bf36d8
--- /dev/null
+++ b/GraphRAG/ui/react/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/GraphRAG/ui/react/README.md b/GraphRAG/ui/react/README.md
new file mode 100644
index 0000000000..3477828eaa
--- /dev/null
+++ b/GraphRAG/ui/react/README.md
@@ -0,0 +1,32 @@
+# ChatQnA Conversational UI
+
+## 📸 Project Screenshots
+
+![project-screenshot](../../assets/img/conversation_ui_init.png)
+![project-screenshot](../../assets/img/conversation_ui_response.png)
+![project-screenshot](../../assets/img/conversation_ui_upload.png)
+
+## 🧐 Features
+
+Here're some of the project's features:
+
+- Start a Text Chat：Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
+- Context Awareness: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.
+- Upload File: The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
+- Clear: Clear the record of the current dialog box without retaining the contents of the dialog box.
+- Chat history: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
+- Conversational Chat : The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
+
+## 🛠️ Get it Running
+
+1. Clone the repo.
+
+2. cd command to the current folder.
+
+3. Modify the required .env variables.
+   ```
+   DOC_BASE_URL = ''
+   ```
+4. Execute `npm install` to install the corresponding dependencies.
+
+5. Execute `npm run dev` in both environments
diff --git a/GraphRAG/ui/react/env.sh b/GraphRAG/ui/react/env.sh
new file mode 100644
index 0000000000..ce1372ea68
--- /dev/null
+++ b/GraphRAG/ui/react/env.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+for i in $(env | grep APP_) #// Make sure to use the prefix MY_APP_ if you have any other prefix in env.production file variable name replace it with MY_APP_
+do
+    key=$(echo $i | cut -d '=' -f 1)
+    value=$(echo $i | cut -d '=' -f 2-)
+    echo $key=$value
+    # sed All files
+    # find /usr/share/nginx/html -type f -exec sed -i "s|${key}|${value}|g" '{}' +
+
+    # sed JS and CSS only
+    find /usr/share/nginx/html -type f \( -name '*.js' -o -name '*.css' \) -exec sed -i "s|${key}|${value}|g" '{}' +
+done
diff --git a/GraphRAG/ui/react/index.html b/GraphRAG/ui/react/index.html
new file mode 100644
index 0000000000..fbe87e0fd5
--- /dev/null
+++ b/GraphRAG/ui/react/index.html
@@ -0,0 +1,18 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/src/assets/opea-icon-color.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Conversations UI</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
diff --git a/GraphRAG/ui/react/nginx.conf b/GraphRAG/ui/react/nginx.conf
new file mode 100644
index 0000000000..01aef12751
--- /dev/null
+++ b/GraphRAG/ui/react/nginx.conf
@@ -0,0 +1,20 @@
+server {
+  listen 80;
+
+  gzip on;
+  gzip_proxied any;
+  gzip_comp_level 6;
+  gzip_buffers 16 8k;
+  gzip_http_version 1.1;
+  gzip_types font/woff2 text/css application/javascript application/json application/font-woff application/font-tff image/gif image/png image/svg+xml application/octet-stream;
+
+  location / {
+    root /usr/share/nginx/html;
+    index index.html index.htm;
+    try_files $uri $uri/ /index.html =404;
+
+    location ~* \.(gif|jpe?g|png|webp|ico|svg|css|js|mp4|woff2)$ {
+      expires 1d;
+    }
+  }
+}
diff --git a/GraphRAG/ui/react/package.json b/GraphRAG/ui/react/package.json
new file mode 100644
index 0000000000..3760ed909b
--- /dev/null
+++ b/GraphRAG/ui/react/package.json
@@ -0,0 +1,47 @@
+{
+  "name": "ui",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc && vite build",
+    "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
+    "preview": "vite preview",
+    "test": "vitest"
+  },
+  "dependencies": {
+    "@mantine/core": "^7.10.0",
+    "@mantine/hooks": "^7.10.0",
+    "@mantine/notifications": "^7.10.2",
+    "@microsoft/fetch-event-source": "^2.0.1",
+    "@reduxjs/toolkit": "^2.2.5",
+    "@tabler/icons-react": "^3.5.0",
+    "axios": "^1.7.2",
+    "luxon": "^3.4.4",
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0",
+    "react-redux": "^9.1.2"
+  },
+  "devDependencies": {
+    "@testing-library/react": "^16.0.0",
+    "@types/luxon": "^3.4.2",
+    "@types/node": "^20.12.12",
+    "@types/react": "^18.2.66",
+    "@types/react-dom": "^18.2.22",
+    "@typescript-eslint/eslint-plugin": "^7.2.0",
+    "@typescript-eslint/parser": "^7.2.0",
+    "@vitejs/plugin-react": "^4.2.1",
+    "eslint": "^8.57.0",
+    "eslint-plugin-react-hooks": "^4.6.0",
+    "eslint-plugin-react-refresh": "^0.4.6",
+    "jsdom": "^24.1.0",
+    "postcss": "^8.4.38",
+    "postcss-preset-mantine": "^1.15.0",
+    "postcss-simple-vars": "^7.0.1",
+    "sass": "1.64.2",
+    "typescript": "^5.2.2",
+    "vite": "^5.2.13",
+    "vitest": "^1.6.0"
+  }
+}
diff --git a/GraphRAG/ui/react/postcss.config.cjs b/GraphRAG/ui/react/postcss.config.cjs
new file mode 100644
index 0000000000..e817f567be
--- /dev/null
+++ b/GraphRAG/ui/react/postcss.config.cjs
@@ -0,0 +1,14 @@
+module.exports = {
+  plugins: {
+    "postcss-preset-mantine": {},
+    "postcss-simple-vars": {
+      variables: {
+        "mantine-breakpoint-xs": "36em",
+        "mantine-breakpoint-sm": "48em",
+        "mantine-breakpoint-md": "62em",
+        "mantine-breakpoint-lg": "75em",
+        "mantine-breakpoint-xl": "88em",
+      },
+    },
+  },
+};
diff --git a/GraphRAG/ui/react/public/vite.svg b/GraphRAG/ui/react/public/vite.svg
new file mode 100644
index 0000000000..ee9fadaf9c
--- /dev/null
+++ b/GraphRAG/ui/react/public/vite.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
diff --git a/GraphRAG/ui/react/src/App.scss b/GraphRAG/ui/react/src/App.scss
new file mode 100644
index 0000000000..187764a179
--- /dev/null
+++ b/GraphRAG/ui/react/src/App.scss
@@ -0,0 +1,42 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+@import "./styles/styles";
+
+.root {
+  @include flex(row, nowrap, flex-start, flex-start);
+}
+
+.layout-wrapper {
+  @include absolutes;
+
+  display: grid;
+
+  width: 100%;
+  height: 100%;
+
+  grid-template-columns: 80px auto;
+  grid-template-rows: 1fr;
+}
+
+/* ===== Scrollbar CSS ===== */
+/* Firefox */
+* {
+  scrollbar-width: thin;
+  scrollbar-color: #d6d6d6 #ffffff;
+}
+
+/* Chrome, Edge, and Safari */
+*::-webkit-scrollbar {
+  width: 8px;
+}
+
+*::-webkit-scrollbar-track {
+  background: #ffffff;
+}
+
+*::-webkit-scrollbar-thumb {
+  background-color: #d6d6d6;
+  border-radius: 16px;
+  border: 4px double #dedede;
+}
diff --git a/GraphRAG/ui/react/src/App.tsx b/GraphRAG/ui/react/src/App.tsx
new file mode 100644
index 0000000000..4be4fa5bb5
--- /dev/null
+++ b/GraphRAG/ui/react/src/App.tsx
@@ -0,0 +1,34 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import "./App.scss"
+import { MantineProvider } from "@mantine/core"
+import '@mantine/notifications/styles.css';
+import { SideNavbar, SidebarNavList } from "./components/sidebar/sidebar"
+import { IconMessages } from "@tabler/icons-react"
+import UserInfoModal from "./components/UserInfoModal/UserInfoModal"
+import Conversation from "./components/Conversation/Conversation"
+import { Notifications } from '@mantine/notifications';
+
+const title = "Chat QnA"
+const navList: SidebarNavList = [
+  { icon: IconMessages, label: title }
+]
+
+function App() {
+  
+  return (
+    <MantineProvider>
+      <Notifications position="top-right" />
+      <UserInfoModal />
+      <div className="layout-wrapper">
+        <SideNavbar navList={navList} />
+        <div className="content">
+          <Conversation title={title} />
+        </div>
+      </div>
+    </MantineProvider>
+  )
+}
+
+export default App
diff --git a/GraphRAG/ui/react/src/__tests__/util.test.ts b/GraphRAG/ui/react/src/__tests__/util.test.ts
new file mode 100644
index 0000000000..e67ba2c86a
--- /dev/null
+++ b/GraphRAG/ui/react/src/__tests__/util.test.ts
@@ -0,0 +1,14 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, expect, test } from "vitest";
+import { getCurrentTimeStamp, uuidv4 } from "../common/util";
+
+describe("unit tests", () => {
+  test("check UUID is of length 36", () => {
+    expect(uuidv4()).toHaveLength(36);
+  });
+  test("check TimeStamp generated is of unix", () => {
+    expect(getCurrentTimeStamp()).toBe(Math.floor(Date.now() / 1000));
+  });
+});
diff --git a/GraphRAG/ui/react/src/assets/opea-icon-black.svg b/GraphRAG/ui/react/src/assets/opea-icon-black.svg
new file mode 100644
index 0000000000..5c96dc7622
--- /dev/null
+++ b/GraphRAG/ui/react/src/assets/opea-icon-black.svg
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 28.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 width="800px" height="800px" viewBox="0 0 800 800" style="enable-background:new 0 0 800 800;" xml:space="preserve">
+<style type="text/css">
+	.Drop_x0020_Shadow{fill:none;}
+	.Outer_x0020_Glow_x0020_5_x0020_pt{fill:none;}
+	.Blue_x0020_Neon{fill:none;stroke:#8AACDA;stroke-width:7;stroke-linecap:round;stroke-linejoin:round;}
+	.Chrome_x0020_Highlight{fill:url(#SVGID_1_);stroke:#FFFFFF;stroke-width:0.3629;stroke-miterlimit:1;}
+	.Jive_GS{fill:#FFDD00;}
+	.Alyssa_GS{fill:#A6D0E4;}
+	.st0{fill:#FF6900;}
+	.st1{fill:#FFB500;}
+	.st2{fill:#FFFFFF;}
+</style>
+<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="-1640" y1="-1640" x2="-1640" y2="-1641">
+	<stop  offset="0" style="stop-color:#656565"/>
+	<stop  offset="0.618" style="stop-color:#1B1B1B"/>
+	<stop  offset="0.6292" style="stop-color:#545454"/>
+	<stop  offset="0.9831" style="stop-color:#3E3E3E"/>
+</linearGradient>
+<g>
+	<polygon points="400,0 737.5,181.7 607.3,252.8 269.7,71.1 	"/>
+	<path d="M708.3,414.7l29.2,15.7l-130.3,71.1l-44.9-24.2v-31.7l40,21.5c1.5,0.8,3.2,1.2,4.9,1.2c1.7,0,3.4-0.4,5-1.3l0,0
+		L708.3,414.7z"/>
+	<path d="M557.3,532.1c-0.1-0.1-0.3-0.1-0.4-0.2l0,0l-34.2-18.4l31.2-17l42.9,23.1v169.9l-34.5-18.6V541
+		C562.4,537.3,560.4,533.9,557.3,532.1z"/>
+	<polygon points="410.4,381.3 541.6,309.7 541.6,479.5 410.4,551.2 	"/>
+	<path d="M258.4,88.5l338.6,182.3v169.9l-34.5-18.6V292.2c0-3.7-1.9-7-5.1-8.9c-0.1-0.1-0.3-0.1-0.4-0.2l0,0L258.4,122.3V88.5z"/>
+	<polygon points="192.7,110.6 530.3,292.3 400,363.4 62.5,181.6 	"/>
+	<polygon points="51.1,369 51.1,199 389.6,381.3 389.6,551.3 96.6,393.5 	"/>
+	<path d="M91.7,414.4l303.4,163.3c1.5,0.8,3.2,1.2,4.9,1.2c1.7,0,3.4-0.4,5-1.3l0,0l96.1-52.4l29.2,15.7L400,612.1L62.5,430.4
+		L91.7,414.4z"/>
+	<polygon points="51.1,447.8 389.6,630.1 389.6,800 51.1,617.7 	"/>
+	<polygon points="541.6,728.3 410.4,799.9 410.4,630 541.6,558.4 	"/>
+	<polygon points="748.9,617.7 617.6,689.3 617.6,519.5 748.9,447.9 	"/>
+	<polygon points="748.9,369 617.6,440.6 617.6,270.7 748.9,199.1 	"/>
+</g>
+</svg>
diff --git a/GraphRAG/ui/react/src/assets/opea-icon-color.svg b/GraphRAG/ui/react/src/assets/opea-icon-color.svg
new file mode 100644
index 0000000000..790151171e
--- /dev/null
+++ b/GraphRAG/ui/react/src/assets/opea-icon-color.svg
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 28.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 width="800px" height="800px" viewBox="0 0 800 800" style="enable-background:new 0 0 800 800;" xml:space="preserve">
+<style type="text/css">
+	.Drop_x0020_Shadow{fill:none;}
+	.Outer_x0020_Glow_x0020_5_x0020_pt{fill:none;}
+	.Blue_x0020_Neon{fill:none;stroke:#8AACDA;stroke-width:7;stroke-linecap:round;stroke-linejoin:round;}
+	.Chrome_x0020_Highlight{fill:url(#SVGID_1_);stroke:#FFFFFF;stroke-width:0.3629;stroke-miterlimit:1;}
+	.Jive_GS{fill:#FFDD00;}
+	.Alyssa_GS{fill:#A6D0E4;}
+	.st0{fill:#FF6900;}
+	.st1{fill:#FFB500;}
+	.st2{fill:#FFFFFF;}
+</style>
+<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="-820" y1="-1640" x2="-820" y2="-1641">
+	<stop  offset="0" style="stop-color:#656565"/>
+	<stop  offset="0.618" style="stop-color:#1B1B1B"/>
+	<stop  offset="0.6292" style="stop-color:#545454"/>
+	<stop  offset="0.9831" style="stop-color:#3E3E3E"/>
+</linearGradient>
+<g>
+	<polygon class="st0" points="400,0 737.5,181.7 607.3,252.8 269.7,71.1 	"/>
+	<path class="st1" d="M708.3,414.7l29.2,15.7l-130.3,71.1l-44.9-24.2v-31.7l40,21.5c1.5,0.8,3.2,1.2,4.9,1.2c1.7,0,3.4-0.4,5-1.3
+		l0,0L708.3,414.7z"/>
+	<path class="st1" d="M557.3,532.1c-0.1-0.1-0.3-0.1-0.4-0.2l0,0l-34.2-18.4l31.2-17l42.9,23.1v169.9l-34.5-18.6V541
+		C562.4,537.3,560.4,533.9,557.3,532.1z"/>
+	<polygon class="st1" points="410.4,381.3 541.6,309.7 541.6,479.5 410.4,551.2 	"/>
+	<path class="st0" d="M258.4,88.5l338.6,182.3v169.9l-34.5-18.6V292.2c0-3.7-1.9-7-5.1-8.9c-0.1-0.1-0.3-0.1-0.4-0.2l0,0
+		L258.4,122.3V88.5z"/>
+	<polygon class="st1" points="192.7,110.6 530.3,292.3 400,363.4 62.5,181.6 	"/>
+	<polygon class="st1" points="51.1,369 51.1,199 389.6,381.3 389.6,551.3 96.6,393.5 	"/>
+	<path class="st0" d="M91.7,414.4l303.4,163.3c1.5,0.8,3.2,1.2,4.9,1.2c1.7,0,3.4-0.4,5-1.3l0,0l96.1-52.4l29.2,15.7L400,612.1
+		L62.5,430.4L91.7,414.4z"/>
+	<polygon class="st0" points="51.1,447.8 389.6,630.1 389.6,800 51.1,617.7 	"/>
+	<polygon class="st0" points="541.6,728.3 410.4,799.9 410.4,630 541.6,558.4 	"/>
+	<polygon class="st1" points="748.9,617.7 617.6,689.3 617.6,519.5 748.9,447.9 	"/>
+	<polygon class="st0" points="748.9,369 617.6,440.6 617.6,270.7 748.9,199.1 	"/>
+</g>
+</svg>
diff --git a/GraphRAG/ui/react/src/assets/react.svg b/GraphRAG/ui/react/src/assets/react.svg
new file mode 100644
index 0000000000..8e0e0f15c0
--- /dev/null
+++ b/GraphRAG/ui/react/src/assets/react.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
diff --git a/GraphRAG/ui/react/src/common/client.ts b/GraphRAG/ui/react/src/common/client.ts
new file mode 100644
index 0000000000..7512f73e33
--- /dev/null
+++ b/GraphRAG/ui/react/src/common/client.ts
@@ -0,0 +1,8 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import axios from "axios";
+
+//add iterceptors to add any request headers
+
+export default axios;
diff --git a/GraphRAG/ui/react/src/common/util.ts b/GraphRAG/ui/react/src/common/util.ts
new file mode 100644
index 0000000000..df65b2d8e0
--- /dev/null
+++ b/GraphRAG/ui/react/src/common/util.ts
@@ -0,0 +1,12 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+export const getCurrentTimeStamp = () => {
+  return Math.floor(Date.now() / 1000);
+};
+
+export const uuidv4 = () => {
+  return "10000000-1000-4000-8000-100000000000".replace(/[018]/g, (c) =>
+    (+c ^ (crypto.getRandomValues(new Uint8Array(1))[0] & (15 >> (+c / 4)))).toString(16),
+  );
+};
diff --git a/GraphRAG/ui/react/src/components/Conversation/Conversation.tsx b/GraphRAG/ui/react/src/components/Conversation/Conversation.tsx
new file mode 100644
index 0000000000..02736d8bd6
--- /dev/null
+++ b/GraphRAG/ui/react/src/components/Conversation/Conversation.tsx
@@ -0,0 +1,156 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { KeyboardEventHandler, SyntheticEvent, useEffect, useRef, useState } from 'react'
+import styleClasses from "./conversation.module.scss"
+import { ActionIcon, Group, Textarea, Title, rem } from '@mantine/core'
+import { IconArrowRight, IconFilePlus, IconMessagePlus } from '@tabler/icons-react'
+import { conversationSelector, doConversation, newConversation } from '../../redux/Conversation/ConversationSlice'
+import { ConversationMessage } from '../Message/conversationMessage'
+import { useAppDispatch, useAppSelector } from '../../redux/store'
+import { Message, MessageRole } from '../../redux/Conversation/Conversation'
+import { getCurrentTimeStamp } from '../../common/util'
+import { useDisclosure } from '@mantine/hooks'
+import DataSource from './DataSource'
+import { ConversationSideBar } from './ConversationSideBar'
+
+type ConversationProps = {
+  title:string
+}
+
+const Conversation = ({ title }: ConversationProps) => {
+
+  const [prompt, setPrompt] = useState<string>("")
+  const promptInputRef = useRef<HTMLTextAreaElement>(null)
+  const [fileUploadOpened, { open: openFileUpload, close: closeFileUpload }] = useDisclosure(false);
+
+  const { conversations, onGoingResult, selectedConversationId } = useAppSelector(conversationSelector)
+  const dispatch = useAppDispatch();
+  const selectedConversation = conversations.find(x=>x.conversationId===selectedConversationId)
+
+  const scrollViewport = useRef<HTMLDivElement>(null)
+
+  const toSend = "Enter"
+
+  const systemPrompt: Partial<Message> = {
+    role: MessageRole.System,
+    content: "You are helpful assistant",
+  };
+
+
+  const handleSubmit = () => {
+
+    const userPrompt: Message = {
+      role: MessageRole.User,
+      content: prompt,
+      time: getCurrentTimeStamp()
+    };
+    let messages: Partial<Message>[] = [];
+    if(selectedConversation){
+      messages  = selectedConversation.Messages.map(message => {
+        return {role:message.role, content:message.content}
+      })
+    }
+    
+    messages = [systemPrompt, ...messages]
+
+    doConversation({
+      conversationId: selectedConversationId,
+      userPrompt,
+      messages,
+      model: "Intel/neural-chat-7b-v3-3",
+    })
+    setPrompt("")
+  }
+
+  const scrollToBottom = () => {
+    scrollViewport.current!.scrollTo({ top: scrollViewport.current!.scrollHeight })
+  }
+
+  useEffect(() => {
+    scrollToBottom()
+  }, [onGoingResult, selectedConversation?.Messages])
+
+  const handleKeyDown: KeyboardEventHandler = (event) => {
+    if (!event.shiftKey && event.key === toSend) {
+      handleSubmit()
+      setTimeout(() => {
+        setPrompt("")
+      }, 1)
+    }
+  }
+
+
+
+  const handleNewConversation = () => {
+    dispatch(newConversation())
+  }
+
+  const handleChange = (event: SyntheticEvent) => {
+    event.preventDefault()
+    setPrompt((event.target as HTMLTextAreaElement).value)
+  }
+  return (
+    <div className={styleClasses.conversationWrapper}>
+      <ConversationSideBar title={title}/>
+      <div className={styleClasses.conversationContent}>
+        <div className={styleClasses.conversationContentMessages}>
+          <div className={styleClasses.conversationTitle}>
+            <Title order={3}>{selectedConversation?.title || ""} </Title>
+            <span className={styleClasses.spacer}></span>
+            <Group>
+              {selectedConversation && selectedConversation?.Messages.length > 0 && (
+                <ActionIcon onClick={handleNewConversation} disabled={onGoingResult != ""} size={32} variant="default">
+                  <IconMessagePlus />
+                </ActionIcon>
+              )}
+              <ActionIcon onClick={openFileUpload} size={32} variant="default">
+                <IconFilePlus />
+              </ActionIcon>
+            </Group>
+          </div>
+
+          <div className={styleClasses.historyContainer} ref={scrollViewport}>
+
+            {!selectedConversation && (
+              <>
+                <div className="infoMessage">Start by asking a question</div>
+                <div className="infoMessage">You can also upload your Document by clicking on Document icon on top right corner</div>
+              </>
+            )}
+
+            {selectedConversation?.Messages.map((message) => {
+              return (<ConversationMessage key={`_ai`} date={message.time * 1000} human={message.role == MessageRole.User} message={message.content} />)
+            })
+            }
+
+            {onGoingResult && (
+              <ConversationMessage key={`_ai`} date={Date.now()} human={false} message={onGoingResult} />
+            )}
+          </div>
+
+          <div className={styleClasses.conversationActions}>
+            <Textarea
+              radius="xl"
+              size="md"
+              placeholder="Ask a question"
+              ref={promptInputRef}
+              onKeyDown={handleKeyDown}
+              onChange={handleChange}
+              value={prompt}
+              rightSectionWidth={42}
+              rightSection={
+                <ActionIcon onClick={handleSubmit} size={32} radius="xl" variant="filled">
+                  <IconArrowRight style={{ width: rem(18), height: rem(18) }} stroke={1.5} />
+                </ActionIcon>
+              }
+            // {...props}
+            />
+          </div>
+        </div>
+      </div>
+      <DataSource opened={fileUploadOpened} onClose={closeFileUpload} />
+    </div >
+  )
+}
+export default Conversation;
diff --git a/GraphRAG/ui/react/src/components/Conversation/ConversationSideBar.tsx b/GraphRAG/ui/react/src/components/Conversation/ConversationSideBar.tsx
new file mode 100644
index 0000000000..12591ad763
--- /dev/null
+++ b/GraphRAG/ui/react/src/components/Conversation/ConversationSideBar.tsx
@@ -0,0 +1,45 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { ScrollAreaAutosize, Title } from "@mantine/core"
+
+import contextStyles from "../../styles/components/context.module.scss"
+import { useAppDispatch, useAppSelector } from "../../redux/store"
+import { conversationSelector, setSelectedConversationId } from "../../redux/Conversation/ConversationSlice"
+// import { userSelector } from "../../redux/User/userSlice"
+
+export interface ConversationContextProps {
+    title: string
+}
+
+export function ConversationSideBar({ title }: ConversationContextProps) {
+    const { conversations, selectedConversationId } = useAppSelector(conversationSelector)
+    // const user = useAppSelector(userSelector)
+    const dispatch = useAppDispatch()
+
+    const conversationList = conversations?.map((curr) => (
+        <div
+            className={contextStyles.contextListItem}
+            data-active={selectedConversationId === curr.conversationId || undefined}
+            onClick={(event) => {
+                event.preventDefault()
+                dispatch(setSelectedConversationId(curr.conversationId))
+                // dispatch(getConversationById({ user, conversationId: curr.conversationId }))
+            }}
+            key={curr.conversationId}
+        >
+            <div className={contextStyles.contextItemName} title={curr.title}>{curr.title}</div>
+        </div>
+    ))
+
+    return (
+        <div className={contextStyles.contextWrapper}>
+            <Title order={3} className={contextStyles.contextTitle}>
+                {title}
+            </Title>
+            <ScrollAreaAutosize type="hover" scrollHideDelay={0}>
+                <div className={contextStyles.contextList}>{conversationList}</div>
+            </ScrollAreaAutosize>
+        </div>
+    )
+}
diff --git a/GraphRAG/ui/react/src/components/Conversation/DataSource.tsx b/GraphRAG/ui/react/src/components/Conversation/DataSource.tsx
new file mode 100644
index 0000000000..6f1b2ab06b
--- /dev/null
+++ b/GraphRAG/ui/react/src/components/Conversation/DataSource.tsx
@@ -0,0 +1,71 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { Button, Container, Drawer, FileInput, Text, TextInput } from '@mantine/core'
+import { SyntheticEvent, useState } from 'react'
+import { useAppDispatch } from '../../redux/store'
+import { submitDataSourceURL, uploadFile } from '../../redux/Conversation/ConversationSlice'
+
+type Props = {
+  opened: boolean
+  onClose: () => void
+}
+
+export default function DataSource({ opened, onClose }: Props) {
+  const title = "Data Source"
+  const [file, setFile] = useState<File | null>();
+  const [isFile, setIsFile] = useState<boolean>(true);
+  const [url, setURL] = useState<string>("");
+  const dispatch = useAppDispatch()
+  const handleFileUpload = () => {
+    if (file)
+      dispatch(uploadFile({ file }))
+  }
+
+  const handleChange = (event: SyntheticEvent) => {
+    event.preventDefault()
+    setURL((event.target as HTMLTextAreaElement).value)
+  }
+
+  const handleSubmit = () => {
+    dispatch(submitDataSourceURL({ link_list: url.split(";") }))
+  }
+
+  return (
+    <Drawer title={title} position="right" opened={opened} onClose={onClose} withOverlay={false}>
+      <Text size="sm">
+        Please upload your local file or paste a remote file link, and Chat will respond based on the content of the uploaded file.
+      </Text>
+
+
+      <Container styles={{
+        root: { paddingTop: '40px', display:'flex', flexDirection:'column', alignItems:'center' }
+      }}>
+        <Button.Group styles={{ group:{alignSelf:'center'}}} >
+          <Button variant={isFile ? 'filled' : 'default'} onClick={() => setIsFile(true)}>Upload FIle</Button>
+          <Button variant={!isFile ? 'filled' : 'default'} onClick={() => setIsFile(false)}>Use Link</Button>
+        </Button.Group>
+      </Container>
+
+      <Container styles={{root:{paddingTop: '40px'}}}>
+        <div>
+          {isFile ? (
+            <>
+              <FileInput value={file} onChange={setFile} placeholder="Choose File" description={"choose a file to upload for RAG"}/>
+              <Button style={{marginTop:'5px'}} onClick={handleFileUpload} disabled={!file}>Upload</Button>
+            </>
+          ) : (
+            <>
+              <TextInput value={url} onChange={handleChange} placeholder='URL' description={"Use semicolons (;) to separate multiple URLs."} />
+                <Button style={{ marginTop: '5px' }} onClick={handleSubmit} disabled={!url}>Upload</Button>
+            </>
+          )}
+        </div>
+        
+
+
+
+      </Container>
+    </Drawer>
+  )
+}
diff --git a/GraphRAG/ui/react/src/components/Conversation/conversation.module.scss b/GraphRAG/ui/react/src/components/Conversation/conversation.module.scss
new file mode 100644
index 0000000000..c637d5e11f
--- /dev/null
+++ b/GraphRAG/ui/react/src/components/Conversation/conversation.module.scss
@@ -0,0 +1,65 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+@import "../../styles/styles";
+
+.spacer {
+  flex: 1 1 auto;
+}
+
+.conversationWrapper {
+  @include flex(row, nowrap, flex-start, flex-start);
+  flex: 1 1 auto;
+  height: 100%;
+  & > * {
+    height: 100%;
+  }
+  .conversationContent {
+    flex: 1 1 auto;
+    position: relative;
+    .conversationContentMessages {
+      @include absolutes;
+      // @include flex(column, nowrap, flex-start, flex-start);
+
+      display: grid;
+      grid-template-areas:
+        "header"
+        "messages"
+        "inputs";
+
+      grid-template-columns: auto;
+      grid-template-rows: 60px auto 100px;
+
+      .conversationTitle {
+        grid-area: header;
+        @include flex(row, nowrap, center, flex-start);
+        height: 60px;
+        padding: 8px 24px;
+        border-bottom: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-4));
+      }
+
+      .historyContainer {
+        grid-area: messages;
+        overflow: auto;
+        width: 100%;
+        padding: 16px 32px;
+        & > * {
+          width: 100%;
+        }
+      }
+
+      .conversationActions {
+        // padding: --var()
+        grid-area: inputs;
+        padding: 18px;
+        border-top: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-4));
+      }
+    }
+
+    .conversationSplash {
+      @include absolutes;
+      @include flex(column, nowrap, center, center);
+      font-size: 32px;
+    }
+  }
+}
diff --git a/GraphRAG/ui/react/src/components/Message/conversationMessage.module.scss b/GraphRAG/ui/react/src/components/Message/conversationMessage.module.scss
new file mode 100644
index 0000000000..b006495530
--- /dev/null
+++ b/GraphRAG/ui/react/src/components/Message/conversationMessage.module.scss
@@ -0,0 +1,15 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+@import "../../styles/styles";
+
+.conversationMessage {
+  @include flex(column, nowrap, flex-start, flex-start);
+  margin-top: 16px;
+  padding: 0 32px;
+  width: 100%;
+
+  & > * {
+    width: 100%;
+  }
+}
diff --git a/GraphRAG/ui/react/src/components/Message/conversationMessage.tsx b/GraphRAG/ui/react/src/components/Message/conversationMessage.tsx
new file mode 100644
index 0000000000..10e7858533
--- /dev/null
+++ b/GraphRAG/ui/react/src/components/Message/conversationMessage.tsx
@@ -0,0 +1,55 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { IconAi, IconUser } from "@tabler/icons-react"
+import style from "./conversationMessage.module.scss"
+import { Group, Text } from "@mantine/core"
+import { DateTime } from "luxon"
+
+export interface ConversationMessageProps {
+  message: string
+  human: boolean
+  date: number
+}
+
+export function ConversationMessage({ human, message, date }: ConversationMessageProps) {
+  const dateFormat = () => {
+    // console.log(date)
+    // console.log(new Date(date))
+    return DateTime.fromJSDate(new Date(date)).toLocaleString(DateTime.DATETIME_MED)
+  }
+
+  return (
+    <div className={style.conversationMessage}>
+      <Group>
+        {/* <Avatar
+          src="https://raw.githubusercontent.com/mantinedev/mantine/master/.demo/avatars/avatar-1.png"
+          alt="Jacob Warnhalter"
+          radius="xl"
+        /> */}
+
+        {human && <IconUser />}
+        {!human && <IconAi />}
+
+        <div>
+          <Text size="sm">
+            {human && "You"} {!human && "Assistant"}
+          </Text>
+          <Text size="xs" c="dimmed">
+            {dateFormat()}
+          </Text>
+        </div>
+      </Group>
+      <Text pl={54} pt="sm" size="sm">
+        {message}
+      </Text>
+
+      {/* <div className={style.header}>
+        {human && <IconUser />}
+        {!human && <IconAi />}
+      </div>
+
+      <div className={style.message}>{message}</div> */}
+    </div>
+  )
+}
diff --git a/GraphRAG/ui/react/src/components/UserInfoModal/UserInfoModal.tsx b/GraphRAG/ui/react/src/components/UserInfoModal/UserInfoModal.tsx
new file mode 100644
index 0000000000..4540bd4c96
--- /dev/null
+++ b/GraphRAG/ui/react/src/components/UserInfoModal/UserInfoModal.tsx
@@ -0,0 +1,48 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { SyntheticEvent, useEffect, useState } from 'react'
+import { useDisclosure } from '@mantine/hooks';
+import { TextInput, Button, Modal } from '@mantine/core';
+import { useDispatch, useSelector } from 'react-redux';
+import { userSelector, setUser } from '../../redux/User/userSlice';
+
+
+const UserInfoModal = () => {
+    const [opened, { open, close }] = useDisclosure(false);
+    const { name } = useSelector(userSelector);
+    const [username, setUsername] = useState(name || "");
+    const dispatch = useDispatch();
+    const handleSubmit = (event: SyntheticEvent) => {
+        event.preventDefault()
+        if(username){
+            close();
+            dispatch(setUser(username));
+            setUsername("")
+        }
+        
+    }
+    useEffect(() => {
+        if (!name) {
+            open();
+        }
+    }, [name])
+    return (
+        <>
+            <Modal opened={opened} withCloseButton={false} onClose={()=>handleSubmit} title="Tell us who you are ?" centered>
+                <>
+                    <form onSubmit={handleSubmit} >
+                        <TextInput label="Username" placeholder="Username" onChange={(event)=> setUsername(event?.currentTarget.value)} value={username} data-autofocus />
+                        <Button fullWidth onClick={handleSubmit} mt="md">
+                            Submit
+                        </Button>
+                    </form>
+                    
+                </>
+            </Modal>
+        </>
+
+    )
+}
+
+export default UserInfoModal
diff --git a/GraphRAG/ui/react/src/components/sidebar/sidebar.module.scss b/GraphRAG/ui/react/src/components/sidebar/sidebar.module.scss
new file mode 100644
index 0000000000..1ce0b33b3e
--- /dev/null
+++ b/GraphRAG/ui/react/src/components/sidebar/sidebar.module.scss
@@ -0,0 +1,73 @@
+/**
+Copyright (C) 2024 Intel Corporation
+SPDX-License-Identifier: Apache-2.0
+  
+  **/
+
+@import "../../styles/styles";
+
+.navbar {
+  width: 100%;
+  @include flex(column, nowrap, center, flex-start);
+  padding: var(--mantine-spacing-md);
+  background-color: var(--mantine-color-blue-filled);
+  // background-color: light-dark(var(--mantine-color-white), var(--mantine-color-dark-6));
+  // border-right: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-4));
+}
+
+.navbarMain {
+  flex: 1;
+}
+
+.navbarLogo {
+  width: 100%;
+  display: flex;
+  justify-content: center;
+  padding-top: var(--mantine-spacing-md);
+  margin-bottom: var(--mantine-spacing-xl);
+}
+
+.link {
+  width: 44px;
+  height: 44px;
+  border-radius: var(--mantine-radius-md);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  color: var(--mantine-color-white);
+
+  &:hover {
+    background-color: var(--mantine-color-blue-7);
+  }
+
+  &[data-active] {
+    &,
+    &:hover {
+      box-shadow: var(--mantine-shadow-sm);
+      background-color: var(--mantine-color-white);
+      color: var(--mantine-color-blue-6);
+    }
+  }
+}
+
+.aside {
+  flex: 0 0 60px;
+  background-color: var(--mantine-color-body);
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  border-right: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-7));
+}
+
+.logo {
+  width: 100%;
+  display: flex;
+  justify-content: center;
+  height: 60px;
+  padding-top: var(--mantine-spacing-s);
+  border-bottom: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-7));
+  margin-bottom: var(--mantine-spacing-xl);
+}
+.logoImg {
+  width: 30px;
+}
diff --git a/GraphRAG/ui/react/src/components/sidebar/sidebar.tsx b/GraphRAG/ui/react/src/components/sidebar/sidebar.tsx
new file mode 100644
index 0000000000..e5e9349e4f
--- /dev/null
+++ b/GraphRAG/ui/react/src/components/sidebar/sidebar.tsx
@@ -0,0 +1,70 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { useState } from "react"
+import { Tooltip, UnstyledButton, Stack, rem } from "@mantine/core"
+import { IconHome2, IconLogout } from "@tabler/icons-react"
+import classes from "./sidebar.module.scss"
+import OpeaLogo from "../../assets/opea-icon-black.svg"
+import { useAppDispatch } from "../../redux/store"
+import { removeUser } from "../../redux/User/userSlice"
+import { logout } from "../../redux/Conversation/ConversationSlice"
+
+interface NavbarLinkProps {
+  icon: typeof IconHome2
+  label: string
+  active?: boolean
+  onClick?(): void
+}
+
+function NavbarLink({ icon: Icon, label, active, onClick }: NavbarLinkProps) {
+  return (
+    <Tooltip label={label} position="right" transitionProps={{ duration: 0 }}>
+      <UnstyledButton onClick={onClick} className={classes.link} data-active={active || undefined}>
+        <Icon style={{ width: rem(20), height: rem(20) }} stroke={1.5} />
+      </UnstyledButton>
+    </Tooltip>
+  )
+}
+
+export interface SidebarNavItem {
+  icon: typeof IconHome2
+  label: string
+}
+
+export type SidebarNavList = SidebarNavItem[]
+
+export interface SideNavbarProps {
+  navList: SidebarNavList
+}
+
+export function SideNavbar({ navList }: SideNavbarProps) {
+  const dispatch =useAppDispatch()
+  const [active, setActive] = useState(0)
+
+  const handleLogout = () => {
+    dispatch(logout())
+    dispatch(removeUser())
+  }
+
+  const links = navList.map((link, index) => (
+    <NavbarLink {...link} key={link.label} active={index === active} onClick={() => setActive(index)} />
+  ))
+
+  return (
+    <nav className={classes.navbar}>
+      <div className={classes.navbarLogo}>
+        <img className={classes.logoImg} src={OpeaLogo} alt="opea logo" />
+      </div>
+
+      <div className={classes.navbarMain}>
+        <Stack justify="center" gap={0}>
+          {links}
+        </Stack>
+      </div>
+      <Stack justify="center" gap={0}>
+        <NavbarLink icon={IconLogout} label="Logout" onClick={handleLogout} />
+      </Stack>
+    </nav>
+  )
+}
diff --git a/GraphRAG/ui/react/src/config.ts b/GraphRAG/ui/react/src/config.ts
new file mode 100644
index 0000000000..f004165a2b
--- /dev/null
+++ b/GraphRAG/ui/react/src/config.ts
@@ -0,0 +1,5 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+export const DATA_PREP_URL = import.meta.env.VITE_DATA_PREP_SERVICE_URL;
+export const CHAT_QNA_URL = import.meta.env.VITE_BACKEND_SERVICE_ENDPOINT;
diff --git a/GraphRAG/ui/react/src/index.scss b/GraphRAG/ui/react/src/index.scss
new file mode 100644
index 0000000000..53e71621ea
--- /dev/null
+++ b/GraphRAG/ui/react/src/index.scss
@@ -0,0 +1,20 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+@import "@mantine/core/styles.css";
+
+:root {
+  font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
+  line-height: 1.5;
+  font-weight: 400;
+}
+
+html,
+body {
+  position: absolute;
+  top: 0;
+  left: 0;
+  width: 100%;
+  height: 100%;
+  overflow: hidden;
+}
diff --git a/GraphRAG/ui/react/src/main.tsx b/GraphRAG/ui/react/src/main.tsx
new file mode 100644
index 0000000000..3d9c91578b
--- /dev/null
+++ b/GraphRAG/ui/react/src/main.tsx
@@ -0,0 +1,17 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import React from "react"
+import ReactDOM from "react-dom/client"
+import App from "./App.tsx"
+import "./index.scss"
+import { Provider } from 'react-redux'
+import { store } from "./redux/store.ts"
+
+ReactDOM.createRoot(document.getElementById("root")!).render(
+  <React.StrictMode>
+    <Provider store={store} >
+      <App />
+    </Provider>
+  </React.StrictMode>
+)
diff --git a/GraphRAG/ui/react/src/redux/Conversation/Conversation.ts b/GraphRAG/ui/react/src/redux/Conversation/Conversation.ts
new file mode 100644
index 0000000000..47ea988a64
--- /dev/null
+++ b/GraphRAG/ui/react/src/redux/Conversation/Conversation.ts
@@ -0,0 +1,32 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+export type ConversationRequest = {
+  conversationId: string;
+  userPrompt: Message;
+  messages: Partial<Message>[];
+  model: string;
+};
+export enum MessageRole {
+  Assistant = "assistant",
+  User = "user",
+  System = "system",
+}
+
+export interface Message {
+  role: MessageRole;
+  content: string;
+  time: number;
+}
+
+export interface Conversation {
+  conversationId: string;
+  title?: string;
+  Messages: Message[];
+}
+
+export interface ConversationReducer {
+  selectedConversationId: string;
+  conversations: Conversation[];
+  onGoingResult: string;
+}
diff --git a/GraphRAG/ui/react/src/redux/Conversation/ConversationSlice.ts b/GraphRAG/ui/react/src/redux/Conversation/ConversationSlice.ts
new file mode 100644
index 0000000000..63d9e562b0
--- /dev/null
+++ b/GraphRAG/ui/react/src/redux/Conversation/ConversationSlice.ts
@@ -0,0 +1,221 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { PayloadAction, createSlice } from "@reduxjs/toolkit";
+import { RootState, store } from "../store";
+import { fetchEventSource } from "@microsoft/fetch-event-source";
+import { Message, MessageRole, ConversationReducer, ConversationRequest } from "./Conversation";
+import { getCurrentTimeStamp, uuidv4 } from "../../common/util";
+import { createAsyncThunkWrapper } from "../thunkUtil";
+import client from "../../common/client";
+import { notifications } from "@mantine/notifications";
+import { CHAT_QNA_URL, DATA_PREP_URL } from "../../config";
+
+const initialState: ConversationReducer = {
+  conversations: [],
+  selectedConversationId: "",
+  onGoingResult: "",
+};
+
+export const ConversationSlice = createSlice({
+  name: "Conversation",
+  initialState,
+  reducers: {
+    logout: (state) => {
+      state.conversations = [];
+      state.selectedConversationId = "";
+      state.onGoingResult = "";
+    },
+    setOnGoingResult: (state, action: PayloadAction<string>) => {
+      state.onGoingResult = action.payload;
+    },
+    addMessageToMessages: (state, action: PayloadAction<Message>) => {
+      const selectedConversation = state.conversations.find((x) => x.conversationId === state.selectedConversationId);
+      selectedConversation?.Messages?.push(action.payload);
+    },
+    newConversation: (state) => {
+      (state.selectedConversationId = ""), (state.onGoingResult = "");
+    },
+    createNewConversation: (state, action: PayloadAction<{ title: string; id: string; message: Message }>) => {
+      state.conversations.push({
+        title: action.payload.title,
+        conversationId: action.payload.id,
+        Messages: [action.payload.message],
+      });
+    },
+    setSelectedConversationId: (state, action: PayloadAction<string>) => {
+      state.selectedConversationId = action.payload;
+    },
+  },
+  extraReducers(builder) {
+    builder.addCase(uploadFile.fulfilled, () => {
+      notifications.update({
+        id: "upload-file",
+        message: "File Uploaded Successfully",
+        loading: false,
+        autoClose: 3000,
+      });
+    }),
+      builder.addCase(uploadFile.rejected, () => {
+        notifications.update({
+          color: "red",
+          id: "upload-file",
+          message: "Failed to Upload file",
+          loading: false,
+        });
+      });
+    builder.addCase(submitDataSourceURL.fulfilled, () => {
+      notifications.show({
+        message: "Submitted Successfully",
+      });
+    });
+    builder.addCase(submitDataSourceURL.rejected, () => {
+      notifications.show({
+        color: "red",
+        message: "Submit Failed",
+      });
+    });
+  },
+});
+
+export const submitDataSourceURL = createAsyncThunkWrapper(
+  "conversation/submitDataSourceURL",
+  async ({ link_list }: { link_list: string[] }, {}) => {
+    const body = new FormData();
+    body.append("link_list", JSON.stringify(link_list));
+    const response = await client.post(DATA_PREP_URL, body);
+    return response.data;
+  },
+);
+export const uploadFile = createAsyncThunkWrapper("conversation/uploadFile", async ({ file }: { file: File }, {}) => {
+  const body = new FormData();
+  body.append("files", file);
+
+  notifications.show({
+    id: "upload-file",
+    message: "uploading File",
+    loading: true,
+  });
+  const response = await client.post(DATA_PREP_URL, body);
+  return response.data;
+});
+export const {
+  logout,
+  setOnGoingResult,
+  newConversation,
+  addMessageToMessages,
+  setSelectedConversationId,
+  createNewConversation,
+} = ConversationSlice.actions;
+export const conversationSelector = (state: RootState) => state.conversationReducer;
+export default ConversationSlice.reducer;
+
+export const doConversation = (conversationRequest: ConversationRequest) => {
+  const { conversationId, userPrompt, messages, model } = conversationRequest;
+  if (!conversationId) {
+    //newConversation
+    const id = uuidv4();
+    store.dispatch(
+      createNewConversation({
+        title: userPrompt.content,
+        id,
+        message: userPrompt,
+      }),
+    );
+    store.dispatch(setSelectedConversationId(id));
+  } else {
+    store.dispatch(addMessageToMessages(userPrompt));
+  }
+  const userPromptWithoutTime = {
+    role: userPrompt.role,
+    content: userPrompt.content,
+  };
+  const body = {
+    messages: [...messages, userPromptWithoutTime],
+    model,
+  };
+
+  //   let conversation: Conversation;
+  let result = "";
+  try {
+    fetchEventSource(CHAT_QNA_URL, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(body),
+      openWhenHidden: true,
+      async onopen(response) {
+        if (response.ok) {
+          return;
+        } else if (response.status >= 400 && response.status < 500 && response.status !== 429) {
+          const e = await response.json();
+          console.log(e);
+          throw Error(e.error.message);
+        } else {
+          console.log("error", response);
+        }
+      },
+      onmessage(msg) {
+        if (msg?.data != "[DONE]") {
+          try {
+            const match = msg.data.match(/b'([^']*)'/);
+            if (match && match[1] != "</s>") {
+              const extractedText = match[1];
+
+              // Check for the presence of \x hexadecimal
+              if (extractedText.includes("\\x")) {
+                // Decode Chinese (or other non-ASCII characters)
+                const decodedText = decodeEscapedBytes(extractedText);
+                result += decodedText;
+              } else {
+                result += extractedText;
+              }
+            } else if (!match) {
+              // Return data without pattern
+              result += msg?.data;
+            }
+            // Store back result if it is not null
+            if (result) {
+              store.dispatch(setOnGoingResult(result));
+            }
+          } catch (e) {
+            console.log("something wrong in msg", e);
+            throw e;
+          }
+        }
+      },
+      onerror(err) {
+        console.log("error", err);
+        store.dispatch(setOnGoingResult(""));
+        //notify here
+        throw err;
+        //handle error
+      },
+      onclose() {
+        //handle close
+        store.dispatch(setOnGoingResult(""));
+
+        store.dispatch(
+          addMessageToMessages({
+            role: MessageRole.Assistant,
+            content: result,
+            time: getCurrentTimeStamp(),
+          }),
+        );
+      },
+    });
+  } catch (err) {
+    console.log(err);
+  }
+};
+
+// decode \x hexadecimal encoding
+function decodeEscapedBytes(str: string): string {
+  // Convert the byte portion separated by \x into a byte array and decode it into a UTF-8 string
+  const byteArray: number[] = str
+    .split("\\x")
+    .slice(1)
+    .map((byte: string) => parseInt(byte, 16));
+  return new TextDecoder("utf-8").decode(new Uint8Array(byteArray));
+}
diff --git a/GraphRAG/ui/react/src/redux/User/user.d.ts b/GraphRAG/ui/react/src/redux/User/user.d.ts
new file mode 100644
index 0000000000..69c4db4ab1
--- /dev/null
+++ b/GraphRAG/ui/react/src/redux/User/user.d.ts
@@ -0,0 +1,6 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+export interface User {
+  name: string | null;
+}
diff --git a/GraphRAG/ui/react/src/redux/User/userSlice.ts b/GraphRAG/ui/react/src/redux/User/userSlice.ts
new file mode 100644
index 0000000000..48d22fe2e6
--- /dev/null
+++ b/GraphRAG/ui/react/src/redux/User/userSlice.ts
@@ -0,0 +1,26 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { createSlice, PayloadAction } from "@reduxjs/toolkit";
+import { RootState } from "../store";
+import { User } from "./user";
+
+const initialState: User = {
+  name: localStorage.getItem("user"),
+};
+
+export const userSlice = createSlice({
+  name: "user",
+  initialState,
+  reducers: {
+    setUser: (state, action: PayloadAction<string>) => {
+      state.name = action.payload;
+    },
+    removeUser: (state) => {
+      state.name = null;
+    },
+  },
+});
+export const { setUser, removeUser } = userSlice.actions;
+export const userSelector = (state: RootState) => state.userReducer;
+export default userSlice.reducer;
diff --git a/GraphRAG/ui/react/src/redux/store.ts b/GraphRAG/ui/react/src/redux/store.ts
new file mode 100644
index 0000000000..2603909266
--- /dev/null
+++ b/GraphRAG/ui/react/src/redux/store.ts
@@ -0,0 +1,49 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { combineReducers, configureStore } from "@reduxjs/toolkit";
+import userReducer from "./User/userSlice";
+import conversationReducer from "./Conversation/ConversationSlice";
+import { TypedUseSelectorHook, useDispatch, useSelector } from "react-redux";
+
+export const store = configureStore({
+  reducer: combineReducers({
+    userReducer,
+    conversationReducer,
+  }),
+  devTools: import.meta.env.PROD || true,
+  preloadedState: loadFromLocalStorage(),
+  middleware: (getDefaultMiddleware) =>
+    getDefaultMiddleware({
+      serializableCheck: false,
+    }),
+});
+
+function saveToLocalStorage(state: ReturnType<typeof store.getState>) {
+  try {
+    const serialState = JSON.stringify(state);
+    localStorage.setItem("reduxStore", serialState);
+  } catch (e) {
+    console.warn(e);
+  }
+}
+
+function loadFromLocalStorage() {
+  try {
+    const serialisedState = localStorage.getItem("reduxStore");
+    if (serialisedState === null) return undefined;
+    return JSON.parse(serialisedState);
+  } catch (e) {
+    console.warn(e);
+    return undefined;
+  }
+}
+
+store.subscribe(() => saveToLocalStorage(store.getState()));
+console.log(store);
+export default store;
+export type AppDispatch = typeof store.dispatch;
+export type RootState = ReturnType<typeof store.getState>;
+
+export const useAppDispatch: () => AppDispatch = useDispatch;
+export const useAppSelector: TypedUseSelectorHook<RootState> = useSelector;
diff --git a/GraphRAG/ui/react/src/redux/thunkUtil.ts b/GraphRAG/ui/react/src/redux/thunkUtil.ts
new file mode 100644
index 0000000000..5df362fd3d
--- /dev/null
+++ b/GraphRAG/ui/react/src/redux/thunkUtil.ts
@@ -0,0 +1,25 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { createAsyncThunk, AsyncThunkPayloadCreator, AsyncThunk } from "@reduxjs/toolkit";
+
+interface ThunkAPIConfig {}
+
+export const createAsyncThunkWrapper = <Returned, ThunkArg = any>(
+  type: string,
+  thunk: AsyncThunkPayloadCreator<Returned, ThunkArg>, // <-- very unsure of this - have tried many things here
+): AsyncThunk<Returned, ThunkArg, ThunkAPIConfig> => {
+  return createAsyncThunk<Returned, ThunkArg, ThunkAPIConfig>(
+    type,
+    // @ts-ignore
+    async (arg, thunkAPI) => {
+      try {
+        // do some stuff here that happens on every action
+        return await thunk(arg, thunkAPI);
+      } catch (err) {
+        // do some stuff here that happens on every error
+        return thunkAPI.rejectWithValue(err);
+      }
+    },
+  );
+};
diff --git a/GraphRAG/ui/react/src/styles/components/_context.scss b/GraphRAG/ui/react/src/styles/components/_context.scss
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/GraphRAG/ui/react/src/styles/components/_sidebar.scss b/GraphRAG/ui/react/src/styles/components/_sidebar.scss
new file mode 100644
index 0000000000..23018ee1f0
--- /dev/null
+++ b/GraphRAG/ui/react/src/styles/components/_sidebar.scss
@@ -0,0 +1,8 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+@import "../layout/flex";
+
+@mixin sidebar {
+  @include flex(column, nowrap, flex-start, flex-start);
+}
diff --git a/GraphRAG/ui/react/src/styles/components/content.scss b/GraphRAG/ui/react/src/styles/components/content.scss
new file mode 100644
index 0000000000..9a230f2493
--- /dev/null
+++ b/GraphRAG/ui/react/src/styles/components/content.scss
@@ -0,0 +1,5 @@
+@mixin textWrapEllipsis {
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  overflow: hidden;
+}
diff --git a/GraphRAG/ui/react/src/styles/components/context.module.scss b/GraphRAG/ui/react/src/styles/components/context.module.scss
new file mode 100644
index 0000000000..17f37ba907
--- /dev/null
+++ b/GraphRAG/ui/react/src/styles/components/context.module.scss
@@ -0,0 +1,67 @@
+@import "../layout/flex";
+@import "../components/content.scss";
+
+.contextWrapper {
+  background-color: light-dark(var(--mantine-color-gray-0), var(--mantine-color-dark-6));
+  border-right: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-4));
+  width: 180px;
+  overflow-y: hidden;
+  overflow-x: hidden;
+  // overflow-y: auto;
+
+  .contextTitle {
+    position: sticky;
+    top: 0;
+    font-family:
+      Greycliff CF,
+      var(--mantine-font-family);
+    margin-bottom: var(--mantine-spacing-xl);
+    background-color: var(--mantine-color-body);
+    padding: var(--mantine-spacing-md);
+    padding-top: 18px;
+    width: 100%;
+    height: 60px;
+    border-bottom: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-7));
+  }
+
+  .contextList {
+    height: 90vh;
+    // display: flex();
+
+    .contextListItem {
+      display: block;
+      text-decoration: none;
+      border-top-right-radius: var(--mantine-radius-md);
+      border-bottom-right-radius: var(--mantine-radius-md);
+      color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-dark-0));
+      padding: 0 var(--mantine-spacing-md);
+      font-size: var(--mantine-font-size-sm);
+      margin-right: var(--mantine-spacing-md);
+      font-weight: 500;
+      height: 44px;
+      width: 100%;
+      line-height: 44px;
+      cursor: pointer;
+
+      .contextItemName {
+        flex: 1 1 auto;
+        width: 130px;
+        @include textWrapEllipsis;
+      }
+
+      &:hover {
+        background-color: light-dark(var(--mantine-color-gray-1), var(--mantine-color-dark-5));
+        color: light-dark(var(--mantine-color-dark), var(--mantine-color-light));
+      }
+
+      &[data-active] {
+        &,
+        &:hover {
+          border-left-color: var(--mantine-color-blue-filled);
+          background-color: var(--mantine-color-blue-filled);
+          color: var(--mantine-color-white);
+        }
+      }
+    }
+  }
+}
diff --git a/GraphRAG/ui/react/src/styles/layout/_basics.scss b/GraphRAG/ui/react/src/styles/layout/_basics.scss
new file mode 100644
index 0000000000..d11b1ef215
--- /dev/null
+++ b/GraphRAG/ui/react/src/styles/layout/_basics.scss
@@ -0,0 +1,7 @@
+@mixin absolutes {
+  position: absolute;
+  top: 0;
+  left: 0;
+  width: 100%;
+  height: 100%;
+}
diff --git a/GraphRAG/ui/react/src/styles/layout/_flex.scss b/GraphRAG/ui/react/src/styles/layout/_flex.scss
new file mode 100644
index 0000000000..18d2ce8ecb
--- /dev/null
+++ b/GraphRAG/ui/react/src/styles/layout/_flex.scss
@@ -0,0 +1,6 @@
+@mixin flex($direction: row, $wrap: nowrap, $alignItems: center, $justifyContent: center) {
+  display: flex;
+  flex-flow: $direction $wrap;
+  align-items: $alignItems;
+  justify-content: $justifyContent;
+}
diff --git a/GraphRAG/ui/react/src/styles/styles.scss b/GraphRAG/ui/react/src/styles/styles.scss
new file mode 100644
index 0000000000..8028d8ad6d
--- /dev/null
+++ b/GraphRAG/ui/react/src/styles/styles.scss
@@ -0,0 +1,5 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+@import "layout/flex";
+@import "layout/basics";
diff --git a/GraphRAG/ui/react/src/vite-env.d.ts b/GraphRAG/ui/react/src/vite-env.d.ts
new file mode 100644
index 0000000000..4260915f75
--- /dev/null
+++ b/GraphRAG/ui/react/src/vite-env.d.ts
@@ -0,0 +1,4 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+/// <reference types="vite/client" />
diff --git a/GraphRAG/ui/react/tsconfig.json b/GraphRAG/ui/react/tsconfig.json
new file mode 100644
index 0000000000..f50b75c5f0
--- /dev/null
+++ b/GraphRAG/ui/react/tsconfig.json
@@ -0,0 +1,23 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "useDefineForClassFields": true,
+    "lib": ["ES2020", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "noEmit": true,
+    "jsx": "react-jsx",
+
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true
+  },
+  "include": ["src"],
+  "references": [{ "path": "./tsconfig.node.json" }]
+}
diff --git a/GraphRAG/ui/react/tsconfig.node.json b/GraphRAG/ui/react/tsconfig.node.json
new file mode 100644
index 0000000000..97ede7ee6f
--- /dev/null
+++ b/GraphRAG/ui/react/tsconfig.node.json
@@ -0,0 +1,11 @@
+{
+  "compilerOptions": {
+    "composite": true,
+    "skipLibCheck": true,
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "allowSyntheticDefaultImports": true,
+    "strict": true
+  },
+  "include": ["vite.config.ts"]
+}
diff --git a/GraphRAG/ui/react/vite.config.ts b/GraphRAG/ui/react/vite.config.ts
new file mode 100644
index 0000000000..b7c7150c4c
--- /dev/null
+++ b/GraphRAG/ui/react/vite.config.ts
@@ -0,0 +1,27 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { defineConfig } from "vitest/config";
+import react from "@vitejs/plugin-react";
+
+// https://vitejs.dev/config/
+export default defineConfig({
+  css: {
+    preprocessorOptions: {
+      scss: {
+        additionalData: `@import "./src/styles/styles.scss";`,
+      },
+    },
+  },
+  plugins: [react()],
+  server: {
+    port: 80,
+  },
+  test: {
+    globals: true,
+    environment: "jsdom",
+  },
+  define: {
+    "import.meta.env": process.env,
+  },
+});
diff --git a/GraphRAG/ui/svelte/.editorconfig b/GraphRAG/ui/svelte/.editorconfig
new file mode 100644
index 0000000000..2b7a6637f7
--- /dev/null
+++ b/GraphRAG/ui/svelte/.editorconfig
@@ -0,0 +1,10 @@
+[*]
+indent_style = tab
+
+[package.json]
+indent_style = space
+indent_size = 2
+
+[*.md]
+indent_style = space
+indent_size = 2
diff --git a/GraphRAG/ui/svelte/.env b/GraphRAG/ui/svelte/.env
new file mode 100644
index 0000000000..d6f7643b62
--- /dev/null
+++ b/GraphRAG/ui/svelte/.env
@@ -0,0 +1,7 @@
+CHAT_BASE_URL = '/v1/chatqna'
+
+UPLOAD_FILE_BASE_URL = '/v1/dataprep'
+
+GET_FILE = '/v1/dataprep/get_file'
+
+DELETE_FILE = '/v1/dataprep/delete_file'
diff --git a/GraphRAG/ui/svelte/.eslintignore b/GraphRAG/ui/svelte/.eslintignore
new file mode 100644
index 0000000000..38972655fa
--- /dev/null
+++ b/GraphRAG/ui/svelte/.eslintignore
@@ -0,0 +1,13 @@
+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+.env
+.env.*
+!.env.example
+
+# Ignore files for PNPM, NPM and YARN
+pnpm-lock.yaml
+package-lock.json
+yarn.lock
diff --git a/GraphRAG/ui/svelte/.eslintrc.cjs b/GraphRAG/ui/svelte/.eslintrc.cjs
new file mode 100644
index 0000000000..4667a6ce89
--- /dev/null
+++ b/GraphRAG/ui/svelte/.eslintrc.cjs
@@ -0,0 +1,23 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+module.exports = {
+	root: true,
+	parser: "@typescript-eslint/parser",
+	extends: ["eslint:recommended", "plugin:@typescript-eslint/recommended", "prettier"],
+	plugins: ["svelte3", "@typescript-eslint", "neverthrow"],
+	ignorePatterns: ["*.cjs"],
+	overrides: [{ files: ["*.svelte"], processor: "svelte3/svelte3" }],
+	settings: {
+		"svelte3/typescript": () => require("typescript"),
+	},
+	parserOptions: {
+		sourceType: "module",
+		ecmaVersion: 2020,
+	},
+	env: {
+		browser: true,
+		es2017: true,
+		node: true,
+	},
+};
diff --git a/GraphRAG/ui/svelte/.prettierignore b/GraphRAG/ui/svelte/.prettierignore
new file mode 100644
index 0000000000..38972655fa
--- /dev/null
+++ b/GraphRAG/ui/svelte/.prettierignore
@@ -0,0 +1,13 @@
+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+.env
+.env.*
+!.env.example
+
+# Ignore files for PNPM, NPM and YARN
+pnpm-lock.yaml
+package-lock.json
+yarn.lock
diff --git a/GraphRAG/ui/svelte/.prettierrc b/GraphRAG/ui/svelte/.prettierrc
new file mode 100644
index 0000000000..c932dd178f
--- /dev/null
+++ b/GraphRAG/ui/svelte/.prettierrc
@@ -0,0 +1 @@
+{"pluginSearchDirs": ["."], "overrides": [{"files": "*.svelte", "options": {"parser": "svelte"}}]}
diff --git a/GraphRAG/ui/svelte/README.md b/GraphRAG/ui/svelte/README.md
new file mode 100644
index 0000000000..d3c26b8f0f
--- /dev/null
+++ b/GraphRAG/ui/svelte/README.md
@@ -0,0 +1,42 @@
+# ChatQnA Customized UI
+
+## 📸 Project Screenshots
+
+![project-screenshot](../../assets/img/chat_ui_init.png)
+![project-screenshot](../../assets/img/chat_ui_response.png)
+![project-screenshot](../../assets/img/chat_ui_upload.png)
+
+## 🧐 Features
+
+Here're some of the project's features:
+
+- Start a Text Chat：Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
+- Clear: Clear the record of the current dialog box without retaining the contents of the dialog box.
+- Chat history: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
+- Scroll to Bottom / Top: The chat automatically slides to the bottom. Users can also click the top icon to slide to the top of the chat record.
+- End to End Time: Shows the time spent on the current conversation.
+- Upload File: The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
+- Delete File: Delete a certain uploaded file.
+
+## 🛠️ Get it Running
+
+1. Clone the repo.
+
+2. cd command to the current folder.
+
+3. Modify the required .env variables.
+
+   ```
+   CHAT_BASE_URL = ''
+
+   UPLOAD_FILE_BASE_URL = ''
+
+   GET_FILE = ''
+
+   DELETE_FILE = ''
+
+   ```
+
+4. Execute `npm install` to install the corresponding dependencies.
+
+5. Execute `npm run dev` in both environments
diff --git a/GraphRAG/ui/svelte/package.json b/GraphRAG/ui/svelte/package.json
new file mode 100644
index 0000000000..e2a39a2c4d
--- /dev/null
+++ b/GraphRAG/ui/svelte/package.json
@@ -0,0 +1,61 @@
+{
+  "name": "sveltekit-auth-example",
+  "version": "0.0.1",
+  "private": true,
+  "scripts": {
+    "dev": "vite dev",
+    "build": "vite build",
+    "preview": "vite preview",
+    "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
+    "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
+    "lint": "prettier --check . && eslint .",
+    "format": "prettier --write ."
+  },
+  "devDependencies": {
+    "@fortawesome/free-solid-svg-icons": "6.2.0",
+    "@playwright/test": "^1.33.0",
+    "@sveltejs/adapter-auto": "1.0.0-next.75",
+    "@sveltejs/kit": "^1.30.4",
+    "@tailwindcss/typography": "0.5.7",
+    "@types/debug": "4.1.7",
+    "@types/node": "^20.12.13",
+    "@typescript-eslint/eslint-plugin": "^5.27.0",
+    "@typescript-eslint/parser": "^5.27.0",
+    "autoprefixer": "^10.4.7",
+    "daisyui": "3.5.1",
+    "date-picker-svelte": "^2.6.0",
+    "debug": "4.3.4",
+    "eslint": "^8.16.0",
+    "eslint-config-prettier": "^8.3.0",
+    "eslint-plugin-neverthrow": "1.1.4",
+    "eslint-plugin-svelte3": "^4.0.0",
+    "postcss": "^8.4.31",
+    "postcss-load-config": "^4.0.1",
+    "postcss-preset-env": "^8.3.2",
+    "prettier": "^2.8.8",
+    "prettier-plugin-svelte": "^2.7.0",
+    "prettier-plugin-tailwindcss": "^0.3.0",
+    "svelte": "^3.59.1",
+    "svelte-check": "^2.7.1",
+    "svelte-fa": "3.0.3",
+    "svelte-preprocess": "^4.10.7",
+    "tailwindcss": "^3.1.5",
+    "tslib": "^2.3.1",
+    "typescript": "^4.7.4",
+    "vite": "^4.5.2"
+  },
+  "type": "module",
+  "dependencies": {
+    "date-fns": "^2.30.0",
+    "driver.js": "^1.3.0",
+    "flowbite-svelte": "^0.38.5",
+    "flowbite-svelte-icons": "^1.4.0",
+    "fuse.js": "^6.6.2",
+    "lodash": "^4.17.21",
+    "playwright": "^1.44.0",
+    "ramda": "^0.29.0",
+    "sse.js": "^0.6.1",
+    "svelte-notifications": "^0.9.98",
+    "svrollbar": "^0.12.0"
+  }
+}
diff --git a/GraphRAG/ui/svelte/playwright.config.ts b/GraphRAG/ui/svelte/playwright.config.ts
new file mode 100644
index 0000000000..d1e3390edf
--- /dev/null
+++ b/GraphRAG/ui/svelte/playwright.config.ts
@@ -0,0 +1,87 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { defineConfig, devices } from "@playwright/test";
+
+/**
+ * Read environment variables from file.
+ * https://github.com/motdotla/dotenv
+ */
+// require('dotenv').config();
+
+/**
+ * See https://playwright.dev/docs/test-configuration.
+ */
+export default defineConfig({
+	testDir: "./tests",
+	/* Maximum time one test can run for. */
+	timeout: 30 * 1000,
+	expect: {
+		/**
+		 * Maximum time expect() should wait for the condition to be met.
+		 * For example in `await expect(locator).toHaveText();`
+		 */
+		timeout: 5000,
+	},
+	/* Run tests in files in parallel */
+	fullyParallel: true,
+	/* Fail the build on CI if you accidentally left test.only in the source code. */
+	forbidOnly: !!process.env.CI,
+	/* Retry on CI only */
+	retries: process.env.CI ? 2 : 0,
+	/* Opt out of parallel tests on CI. */
+	workers: process.env.CI ? 1 : undefined,
+	/* Reporter to use. See https://playwright.dev/docs/test-reporters */
+	reporter: [["html", { open: "never" }]],
+	/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
+	use: {
+		/* Maximum time each action such as `click()` can take. Defaults to 0 (no limit). */
+		actionTimeout: 0,
+		/* Base URL to use in actions like `await page.goto('/')`. */
+		baseURL: "http://10.7.4.57:80",
+
+		/* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
+		trace: "on-first-retry",
+	},
+
+	/* Configure projects for major browsers */
+	projects: [
+		// {
+		// 	name: "chromium",
+		// 	use: { ...devices["Desktop Chrome"] },
+		// },
+
+		/* Test against mobile viewports. */
+		// {
+		//   name: 'Mobile Chrome',
+		//   use: { ...devices['Pixel 5'] },
+		// },
+		// {
+		//   name: 'Mobile Safari',
+		//   use: { ...devices['iPhone 12'] },
+		// },
+
+		/* Test against branded browsers. */
+		// {
+		//   name: 'Microsoft Edge',
+		//   use: { channel: 'msedge' },
+		// },
+		{
+			name: "webkit",
+			use: { ...devices["Desktop Safari"] },
+		},
+		// {
+		//   name: 'Google Chrome',
+		//   use: { channel: 'chrome' },
+		// },
+	],
+
+	/* Folder for test artifacts such as screenshots, videos, traces, etc. */
+	// outputDir: 'test-results/',
+
+	/* Run your local dev server before starting the tests */
+	// webServer: {
+	//   command: 'npm run start',
+	//   port: 3000,
+	// },
+});
diff --git a/GraphRAG/ui/svelte/postcss.config.cjs b/GraphRAG/ui/svelte/postcss.config.cjs
new file mode 100644
index 0000000000..ca1386f9b4
--- /dev/null
+++ b/GraphRAG/ui/svelte/postcss.config.cjs
@@ -0,0 +1,16 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+const tailwindcss = require("tailwindcss");
+const autoprefixer = require("autoprefixer");
+
+const config = {
+	plugins: [
+		//Some plugins, like tailwindcss/nesting, need to run before Tailwind,
+		tailwindcss(),
+		//But others, like autoprefixer, need to run after,
+		autoprefixer,
+	],
+};
+
+module.exports = config;
diff --git a/GraphRAG/ui/svelte/src/app.d.ts b/GraphRAG/ui/svelte/src/app.d.ts
new file mode 100644
index 0000000000..b70d835b2c
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/app.d.ts
@@ -0,0 +1,8 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+// See: https://kit.svelte.dev/docs/types#app
+// import { Result} from "neverthrow";
+interface Window {
+	deviceType: string;
+}
diff --git a/GraphRAG/ui/svelte/src/app.html b/GraphRAG/ui/svelte/src/app.html
new file mode 100644
index 0000000000..a676ff7f11
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/app.html
@@ -0,0 +1,17 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<!doctype html>
+<html lang="en">
+	<head>
+		<meta charset="utf-8" />
+		<link rel="icon" href="%sveltekit.assets%/favicon.png" />
+		<meta name="viewport" content="width=device-width" />
+		%sveltekit.head%
+	</head>
+	<body>
+		<div class="h-full w-full">%sveltekit.body%</div>
+	</body>
+</html>
diff --git a/GraphRAG/ui/svelte/src/app.postcss b/GraphRAG/ui/svelte/src/app.postcss
new file mode 100644
index 0000000000..963bbca4ef
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/app.postcss
@@ -0,0 +1,86 @@
+/* Write your global styles here, in PostCSS syntax */
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+html, body {
+    height: 100%;
+}
+
+.btn {
+	@apply flex-nowrap;
+}
+a.btn {
+	@apply no-underline;
+}
+.input {
+	@apply text-base;
+}
+
+.bg-dark-blue {
+	background-color: #004a86;
+}
+
+.bg-light-blue {
+	background-color: #0068b5;
+}
+
+.bg-turquoise {
+	background-color: #00a3f6;
+}
+
+.bg-header {
+	background-color: #ffffff;
+}
+
+.bg-button {
+	background-color: #0068b5;
+}
+
+.bg-title {
+	background-color: #f7f7f7;
+}
+
+.text-header {
+	color: #0068b5;
+}
+
+.text-button {
+	color: #252e47;
+}
+
+.text-title-color {
+	color: rgb(38,38,38);
+}
+
+.font-intel {
+	font-family: "intel-clear","tahoma",Helvetica,"helvetica",Arial,sans-serif;
+}
+
+.font-title-intel {
+	font-family: "intel-one","intel-clear",Helvetica,Arial,sans-serif;
+}
+
+.bg-footer {
+	background-color: #e7e7e7;
+}
+
+.bg-light-green {
+	background-color: #d7f3a1;
+}
+
+.bg-purple {
+	background-color: #653171;
+}
+
+.bg-dark-blue {
+	background-color: #224678;
+}
+
+.border-input-color {
+	border-color: #605e5c;
+}
+
+.w-12\/12 {
+	width: 100%
+}
diff --git a/GraphRAG/ui/svelte/src/lib/assets/DocManagement/LinkfolderIcon.svelte b/GraphRAG/ui/svelte/src/lib/assets/DocManagement/LinkfolderIcon.svelte
new file mode 100644
index 0000000000..66d1b006a5
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/DocManagement/LinkfolderIcon.svelte
@@ -0,0 +1,36 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script>
+	export let className = "w-16 h-16";
+</script>
+
+<svg
+	t="1713775351763"
+	class="icon {className}"
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="12834"
+	width="32"
+	height="32"
+	><path
+		d="M192 160h253.728a64 64 0 0 1 53.312 28.576l25.824 38.848A64 64 0 0 0 578.176 256H832a64 64 0 0 1 64 64v480a64 64 0 0 1-64 64H192a64 64 0 0 1-64-64V224a64 64 0 0 1 64-64z"
+		fill="#1989FA"
+		p-id="12835"
+	/><path
+		d="M192 352h640a64 64 0 0 1 64 64v384a64 64 0 0 1-64 64H192a64 64 0 0 1-64-64V416a64 64 0 0 1 64-64z"
+		fill="#8BC4FC"
+		p-id="12836"
+	/><path
+		d="M422.624 768a70.656 70.656 0 0 1-49.888-120.672l30.112-30.112a7.488 7.488 0 0 1 5.28-2.208c5.152 1.28 7.104 3.616 7.552 6.4a93.76 93.76 0 0 0 5.472 22.144 7.68 7.68 0 0 1-1.696 8.032l-21.312 21.312a34.912 34.912 0 0 0 0 48.928 34.24 34.24 0 0 0 24.352 10.08 34.944 34.944 0 0 0 24.544-10.08l89.312-89.376a34.688 34.688 0 0 0 0-48.896 7.488 7.488 0 0 1 0-10.56l15.008-15.04a7.488 7.488 0 0 1 5.344-2.208 7.616 7.616 0 0 1 5.312 2.144 70.688 70.688 0 0 1 0 100.032l-89.312 89.28a70.4 70.4 0 0 1-49.76 20.736z"
+		fill="#FFFFFF"
+		p-id="12837"
+	/><path
+		d="M467.168 660.128a7.456 7.456 0 0 1-5.12-2.112 70.72 70.72 0 0 1 0-100l89.312-89.312a70.656 70.656 0 1 1 99.904 99.968l-30.112 30.112a7.488 7.488 0 0 1-5.248 2.208c-5.184-1.28-7.136-3.616-7.552-6.4a97.504 97.504 0 0 0-5.504-22.176 7.648 7.648 0 0 1 1.696-8l21.312-21.312a34.848 34.848 0 0 0 0-48.928 34.24 34.24 0 0 0-24.352-10.08 34.944 34.944 0 0 0-24.544 10.08l-89.312 89.344a34.752 34.752 0 0 0 0 48.896 7.584 7.584 0 0 1 0 10.688l-14.848 14.912a7.52 7.52 0 0 1-5.248 2.176z"
+		fill="#FFFFFF"
+		p-id="12838"
+	/></svg
+>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/DocManagement/fileIcon.svelte b/GraphRAG/ui/svelte/src/lib/assets/DocManagement/fileIcon.svelte
new file mode 100644
index 0000000000..39b204bbfd
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/DocManagement/fileIcon.svelte
@@ -0,0 +1,30 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script>
+    export let className = "w-16 h-16";
+</script>
+
+<svg
+	t="1711440565760"
+	class="icon {className}"
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="23643"
+	><path
+		d="M913.29536 941.04064c0.0256 24.82688-16.54784 44.96384-37.0176 44.98432l-708.23936 0.6912c-20.46464 0.02048-37.07904-20.08576-37.10464-44.91264l-0.83968-859.02848c-0.0256-24.82688 16.54784-44.96384 37.0176-44.98432l521.10848-0.50688 224.39424 210.50368 0.68096 693.25312z"
+		fill="#E6E4E2"
+		p-id="23644"
+	/><path
+		d="M913.29536 253.26592l-189.11744 0.18432c-20.46464 0.02048-37.07904-20.08576-37.10464-44.91264l-0.16384-165.77024 226.38592 210.49856z"
+		fill="#C4BCB1"
+		p-id="23645"
+	/><path
+		d="M720.72192 396.84096a22.54848 22.54848 0 0 1-22.54848 22.54848H326.13376a22.54848 22.54848 0 0 1 0-45.09696h372.0448a22.54848 22.54848 0 0 1 22.54336 22.54848zM720.72192 565.95456a22.54848 22.54848 0 0 1-22.54848 22.54848H326.13376a22.54848 22.54848 0 0 1 0-45.09696h372.0448a22.54848 22.54848 0 0 1 22.54336 22.54848zM720.72192 746.33728a22.54848 22.54848 0 0 1-22.54848 22.54848H326.13376a22.54848 22.54848 0 0 1 0-45.09696h372.0448a22.54848 22.54848 0 0 1 22.54336 22.54848z"
+		fill="#C4BCB1"
+		p-id="23646"
+	/></svg
+>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/DocManagement/folderIcon.svelte b/GraphRAG/ui/svelte/src/lib/assets/DocManagement/folderIcon.svelte
new file mode 100644
index 0000000000..5fd4e14fa9
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/DocManagement/folderIcon.svelte
@@ -0,0 +1,30 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script>
+    export let className = "w-16 h-16";
+</script>
+
+<svg
+	t="1711440048470"
+	class="icon {className}"
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="7455"
+	><path
+		d="M848.8576 199.1936H415.7568c0-26.5728-21.5424-48.128-48.128-48.128H175.1424c-26.5728 0-48.128 21.5424-48.128 48.128V343.5648c0 26.5984 21.5424 48.1408 48.128 48.1408h673.728c26.5728 0 48.128-21.5424 48.128-48.1408v-96.2432c-0.0128-26.5856-21.5552-48.128-48.1408-48.128z"
+		fill="#CCA352"
+		p-id="7456"
+	/><path
+		d="M800.7424 247.3088H223.2576c-26.5728 0-48.128 21.5424-48.128 48.128v48.128c0 26.5984 21.5424 48.1408 48.128 48.1408h577.472c26.5728 0 48.128-21.5424 48.128-48.1408v-48.128c0-26.5728-21.5424-48.128-48.1152-48.128z"
+		fill="#FFFFFF"
+		p-id="7457"
+	/><path
+		d="M848.8576 295.4368H175.1424c-26.5728 0-48.128 21.5424-48.128 48.128v481.2544c0 26.5472 21.5424 48.128 48.128 48.128h673.728c26.5728 0 48.128-21.568 48.128-48.128V343.552c-0.0128-26.5728-21.5552-48.1152-48.1408-48.1152z"
+		fill="#FFCC66"
+		p-id="7458"
+	/></svg
+>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/avatar/svelte/Delete.svelte b/GraphRAG/ui/svelte/src/lib/assets/avatar/svelte/Delete.svelte
new file mode 100644
index 0000000000..fb3f0ed654
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/avatar/svelte/Delete.svelte
@@ -0,0 +1,19 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import { createEventDispatcher } from "svelte";
+
+	let dispatch = createEventDispatcher();
+</script>
+
+<!-- svelte-ignore a11y-click-events-have-key-events -->
+<svg
+class="absolute top-0 right-0 hover:opacity-70"
+on:click={() => {
+	dispatch('DeleteAvatar') }}
+viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" width="20" height="20">
+<path d="M512 832c-176.448 0-320-143.552-320-320S335.552 192 512 192s320 143.552 320 320-143.552 320-320 320m0-704C300.256 128 128 300.256 128 512s172.256 384 384 384 384-172.256 384-384S723.744 128 512 128" fill="#bbbbbb"></path><path d="M649.824 361.376a31.968 31.968 0 0 0-45.248 0L505.6 460.352l-98.976-98.976a31.968 31.968 0 1 0-45.248 45.248l98.976 98.976-98.976 98.976a32 32 0 0 0 45.248 45.248l98.976-98.976 98.976 98.976a31.904 31.904 0 0 0 45.248 0 31.968 31.968 0 0 0 0-45.248L550.848 505.6l98.976-98.976a31.968 31.968 0 0 0 0-45.248" fill="#bbbbbb"></path>
+</svg>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/chat/svelte/Assistant.svelte b/GraphRAG/ui/svelte/src/lib/assets/chat/svelte/Assistant.svelte
new file mode 100644
index 0000000000..1c4642502b
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/chat/svelte/Assistant.svelte
@@ -0,0 +1,33 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+	<!-- <svg
+		width="35"
+		height="35"
+		viewBox="0 0 48 48"
+		fill="none"
+		xmlns="http://www.w3.org/2000/svg"
+	>
+		<g clip-path="url(#clip0_16_93)">
+			<rect x="0.5" y="0.238312" width="47" height="47" fill="#0068B5" />
+			<path
+				d="M39.51 0.238312H8.49C4.0955 0.238312 0.5 3.83381 0.5 8.22831V39.2483C0.5 43.6428 4.0955 47.2383 8.49 47.2383H39.51C43.9045 47.2383 47.5 43.6428 47.5 39.2483V8.22831C47.5 3.83381 43.9045 0.238312 39.51 0.238312ZM44.915 39.2483C44.915 42.2328 42.4945 44.6533 39.51 44.6533H8.49C5.5055 44.6533 3.085 42.2328 3.085 39.2483V8.22831C3.085 5.24381 5.5055 2.82331 8.49 2.82331H39.51C42.4945 2.82331 44.915 5.24381 44.915 8.22831V39.2483Z"
+				fill="#0068B5"
+			/>
+			<path
+				d="M9.52393 21.3178H11.7094L11.7094 29.3548H9.52393V21.3178ZM20.3574 22.2108C20.1694 21.9523 19.8874 21.7408 19.4879 21.5763C19.1119 21.4118 18.6889 21.3178 18.2424 21.3178C17.2084 21.3178 16.3389 21.7643 15.6574 22.6338V21.4823H13.7304V29.3078H15.7984V25.7593C15.7984 24.8898 15.8454 24.2788 15.9629 23.9498C16.0569 23.6208 16.2684 23.3623 16.5504 23.1743C16.8324 22.9863 17.1614 22.8688 17.5139 22.8688C17.7959 22.8688 18.0309 22.9393 18.2424 23.0803C18.4304 23.2213 18.5949 23.4093 18.6654 23.6678C18.7594 23.9263 18.8064 24.4668 18.8064 25.3128V29.3078H20.8744V24.4433C20.8744 23.8323 20.8274 23.3858 20.7569 23.0568C20.6864 22.7513 20.5689 22.4693 20.3574 22.2108ZM25.7389 27.8038C25.5979 27.8038 25.4804 27.7803 25.3864 27.7098C25.2924 27.6393 25.2219 27.5453 25.1984 27.4513C25.1749 27.3573 25.1514 26.9813 25.1514 26.3233V23.1508H26.5614V21.5058H25.1514V18.7563L23.0834 19.9548V21.5058V23.1508V26.5583C23.0834 27.2868 23.1069 27.7803 23.1539 28.0153C23.2009 28.3443 23.2949 28.6263 23.4359 28.8143C23.5769 29.0023 23.7884 29.1668 24.0939 29.3078C24.3994 29.4253 24.7284 29.4958 25.1044 29.4958C25.7154 29.4958 26.2559 29.4018 26.7494 29.1903L26.5614 27.5923C26.2089 27.7333 25.9269 27.8038 25.7389 27.8038ZM33.7524 22.4928C33.0709 21.7173 32.1544 21.3413 31.0029 21.3413C29.9689 21.3413 29.0994 21.7173 28.4414 22.4458C27.7599 23.1743 27.4309 24.1848 27.4309 25.5008C27.4309 26.5818 27.6894 27.4748 28.2064 28.2033C28.8644 29.0963 29.8749 29.5428 31.2379 29.5428C32.1074 29.5428 32.8124 29.3548 33.3764 28.9553C33.9404 28.5558 34.3634 27.9918 34.6219 27.2163L32.5539 26.8638C32.4364 27.2633 32.2719 27.5453 32.0604 27.7098C31.8489 27.8743 31.5669 27.9683 31.2379 27.9683C30.7679 27.9683 30.3684 27.8038 30.0394 27.4513C29.7104 27.0988 29.5459 26.6288 29.5459 26.0178H34.7394C34.7394 24.4433 34.4339 23.2448 33.7524 22.4928ZM29.5694 24.7488C29.5694 24.1848 29.7104 23.7383 29.9924 23.4093C30.2979 23.0803 30.6504 22.9158 31.1204 22.9158C31.5434 22.9158 31.8959 23.0803 32.2014 23.3858C32.5069 23.6913 32.6479 24.1613 32.6714 24.7488H29.5694ZM36.4079 18.5448H38.4759V29.3548H36.4079V18.5448Z"
+				fill="white"
+			/>
+			<path
+				d="M9.52393 18.5448H11.7094L11.7094 20.5654H9.52393V18.5448ZM39.2058 53.1889C59.7131 70.5741 37.9465 53.1367 37.547 52.9722C60.5267 71.228 41.5876 53.1889 41.1411 53.1889C40.1071 53.1889 54.2638 57.2959 53.5823 58.1654L44.3775 54.0099L42.8 56.0803L44.9335 56.0763L43.617 55.1029L49.2888 57.4321C49.2888 56.5626 69.0838 68.5409 41.665 52.9722C67.9574 69.2353 48.7539 58.3534 49.0359 58.1654C49.3179 57.9774 72.2331 77.3305 48.0529 59.0448C73.8431 77.373 40.6532 52.2185 40.8647 52.3595C64.5928 69.3279 66.2469 69.734 44.0477 53.3531C68.4587 70.8049 45.1808 54.42 45.1808 55.266L49.6436 57.6191L50.8176 56.2254L46.645 54.7317C46.645 54.1207 47.0599 55.184 46.9894 54.855C46.9189 54.5495 63.0924 72.6928 39.2058 53.1889ZM45.3834 56.0442C45.2424 56.0442 60.49 64.1373 43.0764 53.1889C59.6606 67.1938 58.0346 62.1756 40.8647 50.7007C58.8678 64.6804 43.7296 53.3942 43.7296 52.7362L43.617 55.1029L43.3529 52.3595L44.7353 53.7418L43.0764 53.1889L44.244 54.855L46.1176 55.6771L42.8 57.336L45.5647 53.1889L41.9705 49.5948L46.1176 55.1029L46.3941 55.6771C46.3941 56.4056 44.3403 54.3363 44.3873 54.5713C65.2775 66.4664 68.0297 70.4029 45.348 56.6803C69.965 73.7705 43.9793 55.5361 44.2848 55.6771C44.5903 55.7946 60.4832 66.2088 41.9705 53.7418C42.5815 53.7418 44.8545 53.1837 45.348 52.9722L43.7511 52.3595C43.3986 52.5005 45.5714 56.0442 45.3834 56.0442ZM44.0342 56.5108C43.3527 55.7353 45.3338 56.783 44.1823 56.783C43.1483 56.783 44.9043 55.6048 44.2463 56.3333C43.5648 57.0618 43.7511 51.0435 43.7511 52.3595C43.7511 53.4405 43.6653 53.0133 44.1823 53.7418C44.8403 54.6348 41.7134 54.2598 43.0764 54.2598C43.9459 54.2598 43.4702 56.9103 44.0342 56.5108C44.5982 56.1113 44.1288 57.5428 44.3873 56.7673L43.7511 56.2254C55.3795 71.8986 44.3938 54.9384 44.1823 55.1029C43.9708 55.2674 44.0801 54.2598 43.7511 54.2598C56.2643 69.3767 58.4567 71.4935 44.1823 55.1029C57.894 68.7712 44.3873 57.3783 44.3873 56.7673L44.1823 56.945C44.1823 55.3705 44.7157 57.2628 44.0342 56.5108ZM44.3873 54.5713C44.3873 54.0073 43.7522 56.8398 44.0342 56.5108C44.3397 56.1818 43.495 56.2254 43.965 56.2254C44.388 56.2254 55.4258 75.7185 43.7511 56.2254C44.0566 56.5309 44.1588 56.1955 44.1823 56.783L44.3873 54.5713Z"
+				fill="#00C7FD"
+			/>
+		</g>
+		<defs>
+			<clipPath id="clip0_16_93">
+				<rect x="0.5" y="0.238312" width="47" height="47" fill="white" />
+			</clipPath>
+		</defs>
+	</svg> -->
diff --git a/GraphRAG/ui/svelte/src/lib/assets/chat/svelte/PaperAirplane.svelte b/GraphRAG/ui/svelte/src/lib/assets/chat/svelte/PaperAirplane.svelte
new file mode 100644
index 0000000000..efecfcf704
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/chat/svelte/PaperAirplane.svelte
@@ -0,0 +1,57 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	export let overrideClasses = "";
+
+	const classes = overrideClasses ? overrideClasses : `w-5 h-5 text-gray-400`;
+</script>
+
+<!-- <svg
+	class={classes}
+	width="10"
+	height="10"
+	fill="none"
+	viewBox="0 0 18 18"
+	style="min-width: 18px; min-height: 18px;"
+	><g
+		><path
+			fill="#3369FF"
+			d="M15.71 8.019 3.835 1.368a1.125 1.125 0 0 0-1.61 1.36l2.04 5.71h5.298a.562.562 0 1 1 0 1.125H4.264l-2.04 5.71a1.128 1.128 0 0 0 1.058 1.506c.194 0 .384-.05.552-.146l11.877-6.65a1.125 1.125 0 0 0 0-1.964Z"
+		/></g
+	></svg
+> -->
+<!--
+<svg
+	class={classes}
+	xmlns="http://www.w3.org/2000/svg"
+	fill="none"
+	viewBox="0 0 24 24"
+	stroke-width="1.5"
+	stroke="currentColor"
+>
+	<path
+		stroke-linecap="round"
+		stroke-linejoin="round"
+		d="M6 12L3.269 3.126A59.768 59.768 0 0121.485 12 59.77 59.77 0 013.27 20.876L5.999 12zm0 0h7.5"
+	/>
+</svg> -->
+
+<svg
+	t="1708926517502"
+	class={classes}
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="4586"
+	id="mx_n_1708926517503"
+	width="200"
+	height="200"
+	><path
+		d="M0 1024l106.496-474.112 588.8-36.864-588.8-39.936-106.496-473.088 1024 512z"
+		p-id="4587"
+		fill="#0068b5"
+	/></svg
+>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/chat/svelte/PersonOutlined.svelte b/GraphRAG/ui/svelte/src/lib/assets/chat/svelte/PersonOutlined.svelte
new file mode 100644
index 0000000000..870e2ed5a8
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/chat/svelte/PersonOutlined.svelte
@@ -0,0 +1,15 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<!-- <svg
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	width="32"
+	height="32"
+>
+<path d="M512 512c93.866667 0 170.666667-76.8 170.666667-170.666667 0-93.866667-76.8-170.666667-170.666667-170.666667C418.133333 170.666667 341.333333 247.466667 341.333333 341.333333 341.333333 435.2 418.133333 512 512 512zM512 597.333333c-115.2 0-341.333333 55.466667-341.333333 170.666667l0 85.333333 682.666667 0 0-85.333333C853.333333 652.8 627.2 597.333333 512 597.333333z" p-id="4050" fill="#ffffff"></path></svg> -->
+
+<svg t="1708914168912" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="1581" width="200" height="200"><path d="M447.13 46.545h101.818v930.91H447.13V46.545z" fill="#0068b5" p-id="1582" data-spm-anchor-id="a313x.search_index.0.i0.12a13a81x9rPe6" class="selected"></path></svg>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/layout/css/driver.css b/GraphRAG/ui/svelte/src/lib/assets/layout/css/driver.css
new file mode 100644
index 0000000000..453db6082a
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/layout/css/driver.css
@@ -0,0 +1,94 @@
+.driverjs-theme {
+	background: transparent;
+	color: #fff;
+	box-shadow: none;
+	padding: 0;
+}
+
+.driver-popover-arrow {
+	border: 10px solid transparent;
+	animation: blink 1s 3 steps(1);
+}
+
+@keyframes blink {
+	0% {
+		opacity: 1;
+	}
+	50% {
+		opacity: 0.2;
+	}
+	100% {
+		opacity: 1;
+	}
+}
+
+.driver-popover.driverjs-theme .driver-popover-arrow-side-left.driver-popover-arrow {
+	border-left-color: #174ed1;
+}
+
+.driver-popover.driverjs-theme .driver-popover-arrow-side-right.driver-popover-arrow {
+	border-right-color: #174ed1;
+}
+
+.driver-popover.driverjs-theme .driver-popover-arrow-side-top.driver-popover-arrow {
+	border-top-color: #174ed1;
+}
+
+.driver-popover.driverjs-theme .driver-popover-arrow-side-bottom.driver-popover-arrow {
+	border-bottom-color: #174ed1;
+}
+
+.driver-popover-footer {
+	background: transparent;
+	color: #fff;
+}
+.driver-popover-title {
+	border-top-left-radius: 5px;
+	border-top-right-radius: 5px;
+}
+
+.driver-popover-title,
+.driver-popover-description {
+	display: block;
+	padding: 15px 15px 7px 15px;
+	background: #174ed1;
+	border: none;
+}
+
+.driver-popover-close-btn {
+	color: #fff;
+}
+
+.driver-popover-footer button:hover,
+.driver-popover-footer button:focus {
+	background: #174ed1;
+	color: #fff;
+}
+
+.driver-popover-description {
+	padding: 5px 15px;
+	border-bottom-left-radius: 5px;
+	border-bottom-right-radius: 5px;
+}
+
+.driver-popover-title[style*="block"] + .driver-popover-description {
+	margin: 0;
+}
+.driver-popover-progress-text {
+	color: #fff;
+}
+
+.driver-popover-footer button {
+	background: #174ed1;
+	border: 2px #174ed1 dashed;
+	color: #fff;
+	border-radius: 50%;
+	text-shadow: none;
+}
+.driver-popover-close-btn:hover,
+.driver-popover-close-btn:focus {
+	color: #fff;
+}
+.driver-popover-navigation-btns button + button {
+	margin-left: 10px;
+}
diff --git a/GraphRAG/ui/svelte/src/lib/assets/upload/deleteIcon.svelte b/GraphRAG/ui/svelte/src/lib/assets/upload/deleteIcon.svelte
new file mode 100644
index 0000000000..2ca57ed8aa
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/upload/deleteIcon.svelte
@@ -0,0 +1,22 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<svg
+	t="1711440930029"
+	class="icon w-5 h-5"
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="4255"
+	><path
+		d="M668.8 896h-320c-48.64 0-88.32-37.76-92.8-87.68L211.2 403.2c-1.92-17.28 10.88-33.28 28.16-35.2 17.28-1.92 33.28 10.88 35.2 28.16l44.16 405.76c1.28 17.28 14.08 30.08 28.8 30.08h320c14.72 0 27.52-12.8 28.8-29.44l44.16-406.4c1.92-17.28 17.92-30.08 35.2-28.16 17.28 1.92 30.08 17.92 28.16 35.2l-44.16 405.76c-2.56 49.28-42.88 87.04-90.88 87.04zM826.24 321.28H190.72c-17.92 0-32-14.08-32-32s14.08-32 32-32h636.16c17.92 0 32 14.08 32 32s-14.72 32-32.64 32z"
+		fill="#a6adbb"
+		p-id="4256"
+	/><path
+		d="M424.96 789.12c-16.64 0-30.72-12.8-32-29.44l-27.52-347.52c-1.28-17.92 11.52-33.28 29.44-34.56 17.92-1.28 33.28 11.52 34.56 29.44l27.52 347.52c1.28 17.92-11.52 33.28-29.44 34.56h-2.56zM580.48 789.12h-2.56c-17.92-1.28-30.72-16.64-29.44-34.56L576 407.04c1.28-17.92 16.64-30.72 34.56-29.44 17.92 1.28 30.72 16.64 29.44 34.56l-27.52 347.52c-1.92 16.64-15.36 29.44-32 29.44zM581.76 244.48c-17.92 0-32-14.08-32-32 0-23.68-19.2-43.52-43.52-43.52s-43.52 19.2-43.52 43.52c0 17.92-14.08 32-32 32s-32-14.08-32-32c0-59.52 48-107.52 107.52-107.52s107.52 48 107.52 107.52c0 17.28-14.08 32-32 32z"
+		fill="#a6adbb"
+		p-id="4257"
+	/></svg
+>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/upload/loading-button.svelte b/GraphRAG/ui/svelte/src/lib/assets/upload/loading-button.svelte
new file mode 100644
index 0000000000..6310d81f6c
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/upload/loading-button.svelte
@@ -0,0 +1,25 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<svg
+  class="animate-spin -ml-1 mr-3 h-5 w-5 text-gray-500"
+  xmlns="http://www.w3.org/2000/svg"
+  fill="none"
+  viewBox="0 0 24 24"
+>
+  <circle
+    class="opacity-25"
+    cx="12"
+    cy="12"
+    r="10"
+    stroke="#0597ff"
+    stroke-width="4"
+  />
+  <path
+    class="opacity-75"
+    fill="#0597ff"
+    d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
+  />
+</svg>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/upload/next.svelte b/GraphRAG/ui/svelte/src/lib/assets/upload/next.svelte
new file mode 100644
index 0000000000..ecbd7ba6b3
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/upload/next.svelte
@@ -0,0 +1,20 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<svg
+	class="h-4 w-4 text-white rtl:rotate-180 dark:text-white-800"
+	aria-hidden="true"
+	xmlns="http://www.w3.org/2000/svg"
+	fill="none"
+	viewBox="0 0 6 10"
+>
+	<path
+		stroke="currentColor"
+		stroke-linecap="round"
+		stroke-linejoin="round"
+		stroke-width="2"
+		d="m1 9 4-4-4-4"
+	/>
+</svg>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/upload/no-file.svelte b/GraphRAG/ui/svelte/src/lib/assets/upload/no-file.svelte
new file mode 100644
index 0000000000..f89f7aafbb
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/upload/no-file.svelte
@@ -0,0 +1,37 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<svg
+	t="1697618332186"
+	class="w-16 h-16"
+	viewBox="0 0 1024 1024"
+	version="1.1"
+	xmlns="http://www.w3.org/2000/svg"
+	p-id="7984"
+	width="16"
+	height="16"
+	><path
+		d="M146.285714 146.285714a97.52381 97.52381 0 0 1 97.52381-97.523809h341.333333l292.571429 292.571428v536.380953a97.52381 97.52381 0 0 1-97.52381 97.523809H243.809524a97.52381 97.52381 0 0 1-97.52381-97.523809V146.285714z"
+		fill="#D1DDE5"
+		p-id="7985"
+	/><path
+		d="M585.142857 48.761905l292.571429 292.571428h-195.047619a97.52381 97.52381 0 0 1-97.52381-97.523809V48.761905z"
+		fill="#ABBBC7"
+		p-id="7986"
+	/><path
+		d="M73.142857 609.52381m48.761905 0l365.714286 0q48.761905 0 48.761904 48.761904l0 121.904762q0 48.761905-48.761904 48.761905l-365.714286 0q-48.761905 0-48.761905-48.761905l0-121.904762q0-48.761905 48.761905-48.761904Z"
+		fill="#ABBBC7"
+		p-id="7987"
+	/><path
+		d="M162.06019 674.133333v34.572191h51.321905v22.186666H162.06019v55.637334H136.533333v-134.582857h85.430857v22.186666H162.06019zM238.640762 651.946667h25.502476v134.582857H238.665143v-134.582857zM288.353524 651.946667h25.502476v112.39619h58.953143v22.186667h-84.455619v-134.582857zM414.427429 674.133333v33.426286h51.151238v22.186667h-51.151238v34.57219h59.928381v22.186667h-85.430858V651.946667h85.430858v22.186666h-59.904z"
+		fill="#FFFFFF"
+		p-id="7988"
+	/><path
+		d="M329.142857 231.619048m-60.952381 0a60.952381 60.952381 0 1 0 121.904762 0 60.952381 60.952381 0 1 0-121.904762 0Z"
+		fill="#FFFFFF"
+		opacity=".6"
+		p-id="7989"
+	/></svg
+>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/upload/previous.svelte b/GraphRAG/ui/svelte/src/lib/assets/upload/previous.svelte
new file mode 100644
index 0000000000..a1e4a19be5
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/upload/previous.svelte
@@ -0,0 +1,20 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<svg
+	class="h-4 w-4 text-white rtl:rotate-180 dark:text-white-800"
+	aria-hidden="true"
+	xmlns="http://www.w3.org/2000/svg"
+	fill="none"
+	viewBox="0 0 6 10"
+>
+	<path
+		stroke="currentColor"
+		stroke-linecap="round"
+		stroke-linejoin="round"
+		stroke-width="2"
+		d="M5 1 1 5l4 4"
+	/>
+</svg>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/voice/svg/paste.svg b/GraphRAG/ui/svelte/src/lib/assets/voice/svg/paste.svg
new file mode 100644
index 0000000000..8910f0ea64
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/voice/svg/paste.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1699596229588" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="20460" xmlns:xlink="http://www.w3.org/1999/xlink" width="32" height="32"><path d="M576 128a96 96 0 0 1 96 96v128h-224a96 96 0 0 0-95.84 90.368L352 448v224H224a96 96 0 0 1-96-96V224a96 96 0 0 1 96-96h352z" fill="#CCD9FF" p-id="20461"></path><path d="M576 96a128 128 0 0 1 128 128v128h-64V224a64 64 0 0 0-59.2-63.84L576 160H224a64 64 0 0 0-64 64v352a64 64 0 0 0 64 64h128v64H224a128 128 0 0 1-128-128V224a128 128 0 0 1 128-128z" fill="#3671FD" p-id="20462"></path><path d="M800 320H448a128 128 0 0 0-128 128v352a128 128 0 0 0 128 128h352a128 128 0 0 0 128-128V448a128 128 0 0 0-128-128z m-352 64h352a64 64 0 0 1 64 64v352a64 64 0 0 1-64 64H448a64 64 0 0 1-64-64V448a64 64 0 0 1 64-64z" fill="#3671FD" p-id="20463"></path><path d="M128 736a32 32 0 0 1 32 32 96 96 0 0 0 90.368 95.84L256 864a32 32 0 0 1 0 64 160 160 0 0 1-160-160 32 32 0 0 1 32-32z" fill="#FE9C23" p-id="20464"></path></svg>
diff --git a/GraphRAG/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg b/GraphRAG/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
new file mode 100644
index 0000000000..9a77286a8f
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
@@ -0,0 +1 @@
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="21"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="0.5" x2="0.5" y1="1" y2="0.054371078"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="28"><g node-id="29"><g node-id="30"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#909efc" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="13" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="31"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#909efc" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="14" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="32"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="15" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="33"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="16" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#909efc" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="17" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 193.00 208.00 L 281.00 208.00 L 281.00 291.00 L 193.00 291.00 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="矩形" node-id="18" stroke="none" target-height="83" target-width="88" target-x="193" target-y="208"></path><path d="M 204.34 187.40 L 205.82 184.30 L 207.42 181.42 L 209.93 177.40 L 212.87 173.22 L 216.68 168.52 L 221.00 163.96 L 226.13 159.47 L 229.84 156.76 L 233.89 154.30 L 238.30 152.10 L 242.89 150.34 L 247.86 149.04 L 253.23 148.21 L 257.26 147.99 L 261.54 148.14 L 266.10 148.68 L 270.96 149.65 L 275.71 151.04 L 280.04 152.70 L 283.99 154.61 L 287.59 156.76 L 292.14 160.10 L 296.15 163.69 L 299.67 167.56 L 302.85 171.73 L 305.60 175.97 L 307.94 180.29 L 310.91 186.93 L 313.11 193.22 L 314.81 199.39 L 315.91 204.60 L 316.69 209.44 L 317.06 212.71 L 317.29 215.80 L 320.19 216.75 L 323.18 217.90 L 327.40 219.80 L 331.79 222.15 L 336.64 225.29 L 339.75 227.69 L 342.75 230.44 L 345.64 233.56 L 348.18 236.94 L 350.35 240.71 L 352.13 244.92 L 353.09 248.17 L 353.71 251.69 L 353.97 255.53 L 353.85 259.71 L 353.08 265.18 L 351.88 270.02 L 350.29 274.31 L 348.26 278.41 L 346.02 282.02 L 343.58 285.18 L 340.85 288.13 L 338.12 290.66 L 335.39 292.82 L 331.16 295.64 L 327.45 297.68 L 323.90 299.30 L 321.43 300.25 L 319.05 301.00 L 319.05 301.00 L 251.99 301.00 L 251.99 301.00 L 263.46 301.00 L 263.46 290.63 L 251.99 290.63 L 251.99 260.46 L 268.27 260.46 L 268.27 260.39 L 268.34 260.43 L 269.03 260.29 L 269.57 259.91 L 269.94 259.36 L 270.08 258.68 L 269.82 257.84 L 269.82 257.84 L 269.92 257.84 L 269.63 257.54 L 269.48 257.40 L 269.48 257.40 L 239.43 225.61 L 238.65 224.96 L 237.66 224.72 L 236.69 224.94 L 235.94 225.52 L 235.94 225.52 L 205.66 257.57 L 205.17 258.11 L 204.98 258.82 L 205.11 259.44 L 205.43 259.93 L 205.91 260.25 L 206.51 260.38 L 206.51 260.38 L 206.51 260.45 L 223.60 260.45 L 223.60 290.63 L 211.72 290.63 L 211.72 300.98 L 149.72 300.98 L 147.36 300.29 L 141.43 298.02 L 137.76 296.26 L 133.63 293.89 L 129.60 291.08 L 125.67 287.63 L 123.28 285.04 L 121.15 282.15 L 119.25 278.95 L 117.77 275.54 L 116.71 271.77 L 116.09 267.57 L 116.03 263.33 L 116.60 258.57 L 117.89 253.21 L 118.25 252.08 L 120.57 245.82 L 122.94 240.56 L 125.36 236.19 L 128.13 232.00 L 130.82 228.57 L 133.44 225.80 L 136.31 223.27 L 138.99 221.28 L 141.49 219.76 L 145.33 217.95 L 148.49 216.90 L 151.46 216.26 L 153.44 216.03 L 155.31 216.01 L 154.82 213.95 L 154.48 211.78 L 154.26 208.66 L 154.37 205.32 L 155.00 201.50 L 155.74 198.98 L 156.87 196.42 L 158.41 193.81 L 160.28 191.44 L 162.73 189.15 L 165.85 186.93 L 170.12 184.73 L 174.13 183.25 L 177.93 182.41 L 181.82 182.04 L 185.38 182.08 L 188.64 182.49 L 193.30 183.66 L 196.92 185.10 L 200.02 186.82 L 201.70 188.13 L 202.29 188.24 L 203.46 188.19 L 203.98 187.92 L 204.34 187.40 Z" fill="#ffffff" fill-rule="nonzero" group-id="1" id="形状结合" node-id="19" stroke="none" target-height="153.01" target-width="237.94" target-x="116.03" target-y="147.99"></path></g></svg>
diff --git a/GraphRAG/ui/svelte/src/lib/modules/chat/ChatMessage.svelte b/GraphRAG/ui/svelte/src/lib/modules/chat/ChatMessage.svelte
new file mode 100644
index 0000000000..a9f8c606ef
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/modules/chat/ChatMessage.svelte
@@ -0,0 +1,59 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import MessageAvatar from "$lib/modules/chat/MessageAvatar.svelte";
+	import type { Message } from "$lib/shared/constant/Interface";
+	import MessageTimer from "./MessageTimer.svelte";
+	import { createEventDispatcher } from "svelte";
+
+	let dispatch = createEventDispatcher();
+
+	export let msg: Message;
+	export let time: string = "";
+</script>
+
+<div
+	class={msg.role === 0
+		? "flex w-full gap-3"
+		: "flex w-full items-center gap-3"}
+	data-testid={msg.role === 0
+		? "display-answer"
+		: "display-question"}
+>
+	<div
+		class={msg.role === 0
+			? "flex aspect-square w-[3px]  items-center justify-center rounded bg-[#0597ff] max-sm:hidden"
+			: "flex aspect-square h-10 w-[3px] items-center justify-center rounded bg-[#000] max-sm:hidden"}
+	>
+		<MessageAvatar role={msg.role} />
+	</div>
+	<div class="group relative items-center">
+		<div>
+			<p
+				class=" max-w-[60vw] items-center whitespace-pre-line break-keep text-[0.8rem] leading-5 sm:max-w-[60rem]"
+			>
+				{@html msg.content}
+			</p>
+		</div>
+	</div>
+</div>
+{#if time}
+	<div>
+		<MessageTimer
+			{time}
+			on:handleTop={() => {
+				dispatch("scrollTop");
+			}}
+		/>
+	</div>
+{/if}
+
+<style>
+	.wrap-style {
+		word-wrap: break-word;
+		word-break: break-all;
+	}
+</style>
diff --git a/GraphRAG/ui/svelte/src/lib/modules/chat/MessageAvatar.svelte b/GraphRAG/ui/svelte/src/lib/modules/chat/MessageAvatar.svelte
new file mode 100644
index 0000000000..c308840c4c
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/modules/chat/MessageAvatar.svelte
@@ -0,0 +1,19 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import AssistantIcon from "$lib/assets/chat/svelte/Assistant.svelte";
+	import PersonOutlined from "$lib/assets/chat/svelte/PersonOutlined.svelte";
+	import { MessageRole } from "$lib/shared/constant/Interface";
+	export let role: MessageRole;
+
+
+</script>
+
+{#if role === MessageRole.User}
+	<PersonOutlined />
+{:else}
+	<AssistantIcon />
+{/if}
diff --git a/GraphRAG/ui/svelte/src/lib/modules/chat/MessageTimer.svelte b/GraphRAG/ui/svelte/src/lib/modules/chat/MessageTimer.svelte
new file mode 100644
index 0000000000..858e2c5726
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/modules/chat/MessageTimer.svelte
@@ -0,0 +1,56 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	export let time: string;
+	import { createEventDispatcher } from "svelte";
+
+	let dispatch = createEventDispatcher();
+</script>
+
+<div class="ml-2 flex flex-col">
+	<div class="my-4 flex items-center justify-end gap-2 space-x-2">
+		<div class="ml-2 w-min cursor-pointer" data-state="closed">
+			<!-- svelte-ignore a11y-click-events-have-key-events -->
+			<svg
+				xmlns="http://www.w3.org/2000/svg"
+				xml:space="preserve"
+				viewBox="0 0 21.6 21.6"
+				width="24"
+				height="24"
+				class="w-5 fill-[#0597ff] hover:fill-[#0597ff]"
+				on:click={() => {
+					dispatch("handleTop");
+				}}
+				><path
+					d="M2.2 3.6V.8h17.2v2.8zm7.2 17.2V10.4L5.8 14l-1.9-1.9 6.9-6.9 6.9 6.9-1.9 1.9-3.6-3.6v10.4z"
+				/></svg
+			>
+		</div>
+		<div
+			class="inline-block w-0.5 self-stretch bg-gray-300 opacity-100 dark:opacity-50"
+		/>
+		<div class="w-min cursor-pointer" data-state="closed">
+			<svg
+				xmlns="http://www.w3.org/2000/svg"
+				xml:space="preserve"
+				viewBox="0 0 21.6 21.6"
+				width="24"
+				height="24"
+				class="w-5 fill-[#0597ff] hover:fill-[#0597ff]"
+				><path d="M12.3 17.1V7.6H7.6v2.8h1.9v6.7H6.4v2.7h8.8v-2.7z" /><circle
+					cx="10.8"
+					cy="3.6"
+					r="1.9"
+				/></svg
+			>
+		</div>
+		<div class="flex items-center space-x-1 text-base text-gray-800"  data-testid='msg-time'>
+			<strong>End to End Time: </strong>
+			<p>{time}s</p>
+		</div>
+	</div>
+	<div class="ml-2 flex flex-col" />
+</div>
diff --git a/GraphRAG/ui/svelte/src/lib/modules/frame/Layout.svelte b/GraphRAG/ui/svelte/src/lib/modules/frame/Layout.svelte
new file mode 100644
index 0000000000..1922a081c5
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/modules/frame/Layout.svelte
@@ -0,0 +1,37 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import { onMount } from "svelte";
+	import { page } from "$app/stores";
+	import { browser } from "$app/environment";
+	import { open } from "$lib/shared/stores/common/Store";
+	import Scrollbar from "$lib/shared/components/scrollbar/Scrollbar.svelte";
+
+	let root: HTMLElement
+	onMount(() => {
+		document.getElementsByTagName("body").item(0)!.removeAttribute("tabindex");
+		// root.style.height = document.documentElement.clientHeight + 'px'
+	});
+
+	if (browser) {
+		page.subscribe(() => {
+			// close side navigation when route changes
+			if (window.innerWidth > 768) {
+				$open = true;
+			}
+		});
+	}
+</script>
+
+<div bind:this={root} class='h-full overflow-hidden relative'>
+	<div class="h-full flex items-start">
+		<div class='relative flex flex-col h-full pl-0 w-full  bg-white'>
+			<Scrollbar className="h-0 grow " classLayout="h-full" alwaysVisible={false}>
+				<slot />
+			</Scrollbar>
+		</div>
+	</div>
+</div>
diff --git a/GraphRAG/ui/svelte/src/lib/network/chat/Network.ts b/GraphRAG/ui/svelte/src/lib/network/chat/Network.ts
new file mode 100644
index 0000000000..d0ae7b701d
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/network/chat/Network.ts
@@ -0,0 +1,24 @@
+//  Copyright (C) 2024 Intel Corporation
+//  SPDX-License-Identifier: Apache-2.0
+
+import { env } from "$env/dynamic/public";
+import { SSE } from "sse.js";
+
+const CHAT_BASE_URL = env.CHAT_BASE_URL;
+
+export async function fetchTextStream(query: string) {
+	let payload = {};
+	let url = "";
+
+	payload = {
+		model: "Intel/neural-chat-7b-v3-3",
+		messages: query,
+	};
+	url = `${CHAT_BASE_URL}`;
+	console.log("fetchTextStream", url);
+
+	return new SSE(url, {
+		headers: { "Content-Type": "application/json" },
+		payload: JSON.stringify(payload),
+	});
+}
diff --git a/GraphRAG/ui/svelte/src/lib/network/upload/Network.ts b/GraphRAG/ui/svelte/src/lib/network/upload/Network.ts
new file mode 100644
index 0000000000..a9c76462a5
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/network/upload/Network.ts
@@ -0,0 +1,82 @@
+// Copyright (c) 2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import { env } from "$env/dynamic/public";
+
+const UPLOAD_FILE_BASE_URL = env.UPLOAD_FILE_BASE_URL;
+const GET_FILE = env.GET_FILE;
+const DELETE_FILE = env.DELETE_FILE;
+
+async function fetchFunc(url, init) {
+	try {
+		const response = await fetch(url, init);
+		if (!response.ok) throw response.status;
+
+		return await response.json();
+	} catch (error) {
+		console.error("network error: ", error);
+
+		return undefined;
+	}
+}
+
+export async function fetchKnowledgeBaseId(file: Blob, fileName: string) {
+	const url = `${UPLOAD_FILE_BASE_URL}`;
+	const formData = new FormData();
+	formData.append("files", file, fileName);
+	const init: RequestInit = {
+		method: "POST",
+		body: formData,
+	};
+
+	return fetchFunc(url, init);
+}
+
+export async function fetchKnowledgeBaseIdByPaste(pasteUrlList: any) {
+	const url = `${UPLOAD_FILE_BASE_URL}`;
+	const formData = new FormData();
+	formData.append("link_list", JSON.stringify(pasteUrlList));
+	const init: RequestInit = {
+		method: "POST",
+		body: formData,
+	};
+
+	return fetchFunc(url, init);
+}
+
+export async function fetchAllFile() {
+	const url = `${GET_FILE}`;
+	const init: RequestInit = {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+	};
+
+	return fetchFunc(url, init);
+}
+
+export async function deleteFiles(path) {
+	const UploadKnowledge_URL = DELETE_FILE;
+
+	const data = {
+		file_path: path,
+	};
+
+	const init: RequestInit = {
+		method: "POST",
+		headers: { "Content-Type": "application/json" },
+		body: JSON.stringify(data),
+	};
+
+	return fetchFunc(UploadKnowledge_URL, init);
+}
diff --git a/GraphRAG/ui/svelte/src/lib/shared/Utils.ts b/GraphRAG/ui/svelte/src/lib/shared/Utils.ts
new file mode 100644
index 0000000000..0921044072
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/Utils.ts
@@ -0,0 +1,43 @@
+//  Copyright (C) 2024 Intel Corporation
+//  SPDX-License-Identifier: Apache-2.0
+
+export function scrollToBottom(scrollToDiv: HTMLElement) {
+	if (scrollToDiv) {
+		setTimeout(
+			() =>
+				scrollToDiv.scroll({
+					behavior: "auto",
+					top: scrollToDiv.scrollHeight,
+				}),
+			100,
+		);
+	}
+}
+
+export function scrollToTop(scrollToDiv: HTMLElement) {
+	if (scrollToDiv) {
+		setTimeout(
+			() =>
+				scrollToDiv.scroll({
+					behavior: "auto",
+					top: 0,
+				}),
+			100,
+		);
+	}
+}
+
+export function getCurrentTimeStamp() {
+	return Math.floor(new Date().getTime());
+}
+
+export function fromTimeStampToTime(timeStamp: number) {
+	return new Date(timeStamp * 1000).toTimeString().slice(0, 8);
+}
+
+export function formatTime(seconds) {
+	const hours = String(Math.floor(seconds / 3600)).padStart(2, "0");
+	const minutes = String(Math.floor((seconds % 3600) / 60)).padStart(2, "0");
+	const remainingSeconds = String(seconds % 60).padStart(2, "0");
+	return `${hours}:${minutes}:${remainingSeconds}`;
+}
diff --git a/GraphRAG/ui/svelte/src/lib/shared/components/chat/gallery.svelte b/GraphRAG/ui/svelte/src/lib/shared/components/chat/gallery.svelte
new file mode 100644
index 0000000000..6a3cc68f2c
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/components/chat/gallery.svelte
@@ -0,0 +1,145 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import Scrollbar from "$lib/shared/components/scrollbar/Scrollbar.svelte";
+	import ChatMessage from "$lib/modules/chat/ChatMessage.svelte";
+	import "driver.js/dist/driver.css";
+	import "$lib/assets/layout/css/driver.css";
+	import Previous from "$lib/assets/upload/previous.svelte";
+	import Next from "$lib/assets/upload/next.svelte";
+	import { scrollToBottom } from "$lib/shared/Utils";
+	import { onMount } from "svelte";
+
+	let scrollToDiv: HTMLDivElement;
+
+	export let items;
+	export let label: string;
+	export let scrollName: string;
+
+	onMount(async () => {
+		scrollToDiv = document
+			.querySelector(scrollName)
+			?.querySelector(".svlr-viewport")!;
+		console.log(
+			"scrollToDiv",
+			scrollName,
+			document,
+			document.querySelector("chat-scrollbar1")
+		);
+	});
+	// gallery
+	let currentIndex = 0;
+
+	function nextItem() {
+		currentIndex = (currentIndex + 1) % items.length;
+		console.log("nextItem", currentIndex);
+	}
+
+	function prevItem() {
+		currentIndex = (currentIndex - 1 + items.length) % items.length;
+		console.log("prevItem", currentIndex);
+	}
+
+	$: currentItem = items[currentIndex];
+
+	$: {
+		if (items) {
+			scrollToBottom(scrollToDiv);
+		}
+	}
+	// gallery
+</script>
+
+<div
+	id="custom-controls-gallery"
+	class="relative mb-8 h-0 w-full w-full grow px-2 {scrollName}"
+	data-carousel="slide"
+>
+	<!-- Carousel wrapper -->
+	<!-- Display current item -->
+	{#if currentItem}
+		<Scrollbar
+			classLayout="flex flex-col gap-5"
+			className="  h-0 w-full grow px-2 mt-3 ml-10"
+		>
+			{#each currentItem.content as message, i}
+				<ChatMessage msg={message} />
+			{/each}
+		</Scrollbar>
+		<!-- Loading text -->
+	{/if}
+
+	<div class="radius absolute left-0 p-2">
+		<!-- Display end to end time -->
+		<label for="" class="mr-2 text-xs font-bold text-blue-700">{label} </label>
+	</div>
+	{#if currentItem.time !== "0s"}
+		<div class="radius absolute right-0 p-2">
+			<!-- Display end to end time -->
+			<label for="" class="mr-2 text-xs font-bold text-blue-700"
+				>End to End Time:
+			</label>
+			<label for="" class="text-xs">{currentItem.time}</label>
+		</div>
+	{/if}
+	<div class="flex items-center justify-between">
+		<div class="justify-left ml-2 flex items-center">
+			<!-- Previous button -->
+			<button
+				type="button"
+				class="group absolute start-0 top-0 z-30 flex h-full
+									cursor-pointer items-center justify-center
+									focus:outline-none"
+				on:click={prevItem}
+			>
+				<span
+					class="group-focus:ring-gray dark:group-hover:bg-[#000]-800/60 dark:group-focus:ring-[#000]-800/70 inline-flex h-7
+										 w-7 items-center justify-center
+										 rounded-full bg-[#000]/10
+										 group-hover:bg-[#000]/50 group-focus:bg-[#000]/50
+										 group-focus:outline-none
+										 group-focus:ring-4 dark:bg-gray-800/30"
+				>
+					<Previous />
+					<span class="sr-only">Previous</span>
+				</span>
+			</button>
+			<!-- Next button -->
+
+			<button
+				type="button"
+				class="group absolute end-0 top-0 z-30 flex h-full cursor-pointer items-center justify-center focus:outline-none"
+				on:click={nextItem}
+			>
+				<span
+					class="group-focus:ring-gray dark:group-hover:bg-[#000]-800/60 dark:group-focus:ring-[#000]-800/70 inline-flex h-7
+									w-7 items-center justify-center
+									rounded-full bg-[#000]/10
+									group-hover:bg-[#000]/50 group-focus:bg-[#000]/50
+									group-focus:outline-none
+									group-focus:ring-4 dark:bg-gray-800/30"
+				>
+					<Next />
+					<span class="sr-only">Next</span>
+				</span>
+			</button>
+		</div>
+	</div>
+</div>
+
+<style>
+	.row::-webkit-scrollbar {
+		display: none;
+	}
+
+	.row {
+		scrollbar-width: none;
+	}
+
+	.row {
+		-ms-overflow-style: none;
+	}
+</style>
diff --git a/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/docCard.svelte b/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/docCard.svelte
new file mode 100644
index 0000000000..451ee8e3ca
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/docCard.svelte
@@ -0,0 +1,150 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import FolderIcon from "$lib/assets/DocManagement/folderIcon.svelte";
+	import LinkfolderIcon from "$lib/assets/DocManagement/LinkfolderIcon.svelte";
+	import { Button, Modal } from "flowbite-svelte";
+	import SvelteTree from "$lib/shared/components/doc_management/treeView/svelte-tree.svelte";
+	import FileIcon from "$lib/assets/DocManagement/fileIcon.svelte";
+	import { createEventDispatcher } from "svelte";
+	import DeleteIcon from "$lib/assets/upload/deleteIcon.svelte";
+	import { deleteFiles } from "$lib/network/upload/Network";
+	import { storageFiles } from "$lib/shared/stores/common/Store";
+	import { getNotificationsContext } from "svelte-notifications";
+
+	const { addNotification } = getNotificationsContext();
+
+	let dispatch = createEventDispatcher();
+	let showDirectory = false;
+	let chooseDir = undefined;
+	let currentIdx = 0;
+	let deleteModal = false;
+	/**
+	 * @type {any}
+	 */
+	let currentFile;
+	/**
+	 * @type {number}
+	 */
+	let currentFileIdx;
+
+
+	export let files = [];
+
+	console.log("files", files);
+
+	function handleDirClick(file, index) {
+		chooseDir = file;
+		showDirectory = true;
+		currentIdx = index;
+		console.log("chooseDir", chooseDir);
+	}
+
+	async function deleteCurrentFolder() {
+
+		const res = await deleteFiles(currentFile);
+		// succeed
+		if (res.status) {
+			$storageFiles = $storageFiles.filter((_, index) => index !== currentFileIdx);
+			files = $storageFiles;
+			showNotification("Deleted successfully", "success");
+
+		} else {
+			showNotification("Deletion failed", "success");
+
+		}
+	}
+
+	function showNotification(text: string, type: string) {
+		addNotification({
+			text: text,
+			position: "top-left",
+			type: type,
+			removeAfter: 3000,
+		});
+	}
+
+	function deleteFileIdx(file, index) {
+		currentFile = file;
+		currentFileIdx = index;
+		deleteModal = true;
+
+	}
+</script>
+
+<Modal
+	bind:open={showDirectory}
+	size="xs"
+	autoclose={true}
+	class="z-50 w-full"
+	outsideclose
+>
+	<hr class="my-8 h-px border-0 bg-gray-200 dark:bg-gray-700" />
+	<SvelteTree data={chooseDir.children} {currentIdx} />
+</Modal>
+
+<Modal bind:open={deleteModal} size="xs" autoclose>
+	<div class="text-center">
+		<h3 class="mb-5 text-lg font-normal text-gray-500">Confirm file deletion?</h3>
+		<Button
+			color="red"
+			class="mr-2"
+			on:click={() => { deleteCurrentFolder() }}>Yes, I'm sure</Button
+		>
+		<Button color="alternative"
+			on:click={() => { deleteModal = false; }}
+		>No, cancel</Button>
+	</div>
+</Modal>
+
+<div class="grid max-h-[35rem] grid-cols-2 gap-5 overflow-auto mt-6">
+	{#each files as file, index}
+		<div
+			class="group relative flex w-full flex-col items-center justify-center p-2 px-12 text-center hover:bg-[#d9eeff] focus:bg-[#d9eeff]"
+		>
+			{#if file.type === "File"}
+				<div class="flex-shrink-0">
+					<FileIcon />
+				</div>
+				<p class="w-[6rem] truncate">
+					{file.name}
+				</p>
+			{:else if file.type === "Directory" && file.id === "uploaded_links"}
+				<button
+					class="flex flex-col items-center"
+					on:click={() => handleDirClick(file, index)}
+				>
+					<div class="flex-shrink-0">
+						<LinkfolderIcon />
+					</div>
+					<p class="truncate">
+						{file.name}
+					</p>
+				</button>
+			{:else}
+				<button
+					class="flex flex-col items-center"
+					on:click={() => handleDirClick(file, index)}
+				>
+					<div class="flex-shrink-0">
+						<FolderIcon />
+					</div>
+					<p class="truncate">
+						{file.name}
+					</p>
+				</button>
+			{/if}
+
+			<!-- svelte-ignore a11y-click-events-have-key-events -->
+			<div
+				class="absolute right-0 top-0 hidden group-hover:block"
+				on:click={() => { deleteFileIdx(file.id, index)  }}
+			>
+				<DeleteIcon />
+			</div>
+		</div>
+	{/each}
+</div>
diff --git a/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/treeView/svelte-tree.svelte b/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/treeView/svelte-tree.svelte
new file mode 100644
index 0000000000..66dae55bef
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/treeView/svelte-tree.svelte
@@ -0,0 +1,35 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import TreeBranch from "./tree-branch.svelte";
+	import { createEventDispatcher } from "svelte";
+
+	let dispatch = createEventDispatcher();
+	type IData = {
+		name: string;
+		id: string;
+		type: string;
+		children: never[];
+	};
+
+	export let currentIdx;
+
+	export let collapse = false,
+		data: IData[] = [],
+		onClick = "";
+
+	console.log("data", data);
+</script>
+
+<div>
+	{#if data && data.length > 0}
+		<ul>
+			<TreeBranch {data} {collapse} {onClick} {currentIdx} />
+		</ul>
+	{:else}
+		<p>Folder is empty. Please upload a file.</p>
+	{/if}
+</div>
diff --git a/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/treeView/tree-branch.svelte b/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/treeView/tree-branch.svelte
new file mode 100644
index 0000000000..27e9d276b9
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/treeView/tree-branch.svelte
@@ -0,0 +1,46 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import { storageFiles } from "$lib/shared/stores/common/Store";
+	import TreeNode from "./tree-node.svelte";
+	import { createEventDispatcher } from "svelte";
+
+	let dispatch = createEventDispatcher();
+	type IData = {
+		name: string;
+		id: string;
+		type: string;
+		children: never[];
+	};
+	export let data: IData[] = [],
+		collapse = false,
+		onClick = "";
+
+	export let currentIdx;
+
+	function changeData() {
+		console.log('change', $storageFiles);
+
+		data = $storageFiles[currentIdx].children;
+	}
+
+	$: $storageFiles ? changeData() : console.log('No change', $storageFiles);
+
+	console.log(data);
+</script>
+
+{#if data && data.length > 0}
+	{#each data as item}
+		<TreeNode
+			bind:node={item}
+			{collapse}
+			{onClick}
+			{currentIdx}
+		/>
+	{/each}
+{:else}
+	<p>Folder is empty. Please upload a file.</p>
+{/if}
diff --git a/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/treeView/tree-node.svelte b/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/treeView/tree-node.svelte
new file mode 100644
index 0000000000..eac311df83
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/components/doc_management/treeView/tree-node.svelte
@@ -0,0 +1,111 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import FileIcon from "$lib/assets/DocManagement/fileIcon.svelte";
+	import FolderIcon from "$lib/assets/DocManagement/folderIcon.svelte";
+	import LinkfolderIcon from "$lib/assets/DocManagement/LinkfolderIcon.svelte";
+	import { createEventDispatcher } from "svelte";
+
+	import { onMount } from "svelte";
+	type IData = {
+		name: string;
+		id: string;
+		type: string;
+		children: never[];
+		parent: IData;
+		currentIdx: number;
+	};
+	export let node: IData,
+		collapse = false,
+		onClick = "",
+		parent = "";
+
+	export let currentIdx;
+
+	let open = collapse;
+
+	function toggleOpen() {
+		open = !open;
+	}
+
+	function handleClickOpen() {
+		toggleOpen();
+	}
+
+	onMount(() => {
+		if (node) {
+			node.parent = parent;
+		}
+	});
+
+
+</script>
+
+<li class="relative ml-5">
+	<div
+		class="my-2 flex items-center gap-4 {node.type === 'File' ? 'ml-5' : ''}"
+	>
+		<!-- link -->
+		{#if node.type === "Directory"}
+			{#if open}
+				<!-- svelte-ignore a11y-click-events-have-key-events -->
+				<svg
+					on:click={handleClickOpen}
+					data-testid="caret-down-node"
+					xmlns="http://www.w3.org/2000/svg"
+					class="h-5 w-5 cursor-pointer"
+					viewBox="0 0 20 20"
+					fill="currentColor"
+				>
+					<path
+						fill-rule="evenodd"
+						d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
+						clip-rule="evenodd"
+					/>
+				</svg>
+			{:else}
+				<!-- svelte-ignore a11y-click-events-have-key-events -->
+				<svg
+					on:click={handleClickOpen}
+					data-testid="caret-up-node"
+					xmlns="http://www.w3.org/2000/svg"
+					class="h-5 w-5 cursor-pointer"
+					viewBox="0 0 20 20"
+					fill="currentColor"
+				>
+					<path
+						fill-rule="evenodd"
+						d="M7.293 14.707a1 1 0 010-1.414L10.586 10 7.293 6.707a1 1 0 011.414-1.414l4 4a1 1 0 010 1.414l-4 4a1 1 0 01-1.414 0z"
+						clip-rule="evenodd"
+					/>
+				</svg>
+			{/if}
+			{#if node.id === "uploaded_links"}
+				<LinkfolderIcon className={"w-12 h-12"} />
+			{:else}
+				<FolderIcon className={"w-12 h-12"} />
+			{/if}
+		{:else}
+			<FileIcon className={"w-10 h-10"} />
+		{/if}
+		<!-- link -->
+
+		<span>{node?.name}</span>
+	</div>
+
+	{#if open && node.type === "Directory"}
+		<ul>
+			{#each node.children as child}
+				<svelte:self
+					bind:node={child}
+					bind:parent={node}
+					{collapse}
+					{onClick}
+				/>
+			{/each}
+		</ul>
+	{/if}
+</li>
diff --git a/GraphRAG/ui/svelte/src/lib/shared/components/loading/Loading.svelte b/GraphRAG/ui/svelte/src/lib/shared/components/loading/Loading.svelte
new file mode 100644
index 0000000000..40f76ddd05
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/components/loading/Loading.svelte
@@ -0,0 +1,37 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<div
+	class="mb-6 flex items-center justify-center self-center bg-black text-sm text-gray-500"
+/>
+<div class="flex items-center justify-center gap-3">
+	<div class="relative inline-flex">
+		<div class="h-2 w-2 rounded-full bg-blue-600" />
+		<div
+			class="absolute left-0 top-0 h-2 w-2 animate-[ping_1s_infinite_100ms] rounded-full bg-blue-600"
+		/>
+		<div
+			class="duration-800 absolute left-0 top-0 h-2 w-2 animate-pulse rounded-full bg-blue-600"
+		/>
+	</div>
+	<div class="relative inline-flex">
+		<div class="h-2 w-2 rounded-full bg-blue-600" />
+		<div
+			class="absolute left-0 top-0 h-2 w-2 animate-[ping_1s_infinite_300ms] rounded-full bg-blue-600"
+		/>
+		<div
+			class="absolute left-0 top-0 h-2 w-2 animate-pulse rounded-full bg-blue-600"
+		/>
+	</div>
+	<div class="relative inline-flex">
+		<div class="h-2 w-2 rounded-full bg-blue-600" />
+		<div
+			class="absolute left-0 top-0 h-2 w-2 animate-[ping_1s_infinite_500ms] rounded-full bg-blue-600"
+		/>
+		<div
+			class="absolute left-0 top-0 h-2 w-2 animate-pulse rounded-full bg-blue-600"
+		/>
+	</div>
+</div>
diff --git a/GraphRAG/ui/svelte/src/lib/shared/components/scrollbar/Scrollbar.svelte b/GraphRAG/ui/svelte/src/lib/shared/components/scrollbar/Scrollbar.svelte
new file mode 100644
index 0000000000..07c2c63078
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/components/scrollbar/Scrollbar.svelte
@@ -0,0 +1,37 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import { Svroller } from "svrollbar";
+    export let className: string = "";
+    export let classLayout: string = "";
+    export let alwaysVisible = true;
+</script>
+
+<div class={className}>
+    <Svroller height="100%" width="100%" {alwaysVisible}>
+        <div class={classLayout}>
+            <slot></slot>
+        </div>
+    </Svroller>
+</div>
+
+<style>
+    :global(.svlr-contents) {
+        height: 100%;
+    }
+
+    .row::-webkit-scrollbar {
+		display: none;
+	}
+
+	.row {
+		scrollbar-width: none;
+	}
+
+	.row {
+		-ms-overflow-style: none;
+	}
+</style>
diff --git a/GraphRAG/ui/svelte/src/lib/shared/components/upload/PasteKnowledge.svelte b/GraphRAG/ui/svelte/src/lib/shared/components/upload/PasteKnowledge.svelte
new file mode 100644
index 0000000000..92c95105bc
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/components/upload/PasteKnowledge.svelte
@@ -0,0 +1,41 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import { Button, Helper, Input, Label, Modal } from "flowbite-svelte";
+	import { createEventDispatcher } from "svelte";
+
+	const dispatch = createEventDispatcher();
+	let formModal = false;
+	let urlValue = "";
+
+	function handelPasteURL() {
+		const pasteUrlList = urlValue.split(";").map((url) => url.trim());
+		dispatch("paste", { pasteUrlList });
+		formModal = false;
+	}
+</script>
+
+<Label class="space-y-1">
+	<div class="grid grid-cols-3">
+		<Input
+			class="col-span-2 rounded-none rounded-l-lg focus:border-blue-700 focus:ring-blue-700"
+			type="text"
+			name="text"
+			placeholder="URL"
+			bind:value={urlValue}
+			data-testid="paste-link"
+		/>
+		<Button
+			type="submit"
+			class="w-full rounded-none rounded-r-lg bg-blue-700"
+			data-testid="paste-click"
+
+			on:click={() => handelPasteURL()}>Confirm</Button
+		>
+	</div>
+
+	<Helper>Use semicolons (;) to separate multiple URLs.</Helper>
+</Label>
diff --git a/GraphRAG/ui/svelte/src/lib/shared/components/upload/upload-knowledge.svelte b/GraphRAG/ui/svelte/src/lib/shared/components/upload/upload-knowledge.svelte
new file mode 100644
index 0000000000..18a7e44fa9
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/components/upload/upload-knowledge.svelte
@@ -0,0 +1,38 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import { Fileupload, Label } from "flowbite-svelte";
+	import { createEventDispatcher } from "svelte";
+
+	const dispatch = createEventDispatcher();
+	let value;
+
+	function handleInput(event: Event) {
+	  const file = (event.target as HTMLInputElement).files![0];
+
+	  if (!file) return;
+
+	  const reader = new FileReader();
+	  reader.onloadend = () => {
+		if (!reader.result) return;
+		const src = reader.result.toString();
+		dispatch("upload", { src: src, fileName: file.name });
+	  };
+	  reader.readAsDataURL(file);
+	}
+  </script>
+
+  <div>
+	<Label class="space-y-2 mb-2">
+	  <Fileupload
+		bind:value
+		on:change={handleInput}
+		class="focus:border-blue-700 focus:ring-0"
+		data-testid="file-upload"
+		accept=".txt,.pdf,.json,.md"
+	  />
+	</Label>
+  </div>
diff --git a/GraphRAG/ui/svelte/src/lib/shared/components/upload/uploadFile.svelte b/GraphRAG/ui/svelte/src/lib/shared/components/upload/uploadFile.svelte
new file mode 100644
index 0000000000..79297cdbaf
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/components/upload/uploadFile.svelte
@@ -0,0 +1,173 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	import { Drawer, Button, CloseButton, Tabs, TabItem } from "flowbite-svelte";
+	import { InfoCircleSolid } from "flowbite-svelte-icons";
+	import { sineIn } from "svelte/easing";
+	import UploadFile from "./upload-knowledge.svelte";
+	import PasteURL from "./PasteKnowledge.svelte";
+	import {
+		knowledge1,
+		knowledgeName,
+		storageFiles,
+	} from "$lib/shared/stores/common/Store";
+	import { getNotificationsContext } from "svelte-notifications";
+	import {
+		fetchAllFile,
+		fetchKnowledgeBaseId,
+		fetchKnowledgeBaseIdByPaste,
+	} from "$lib/network/upload/Network";
+	import DocCard from "../doc_management/docCard.svelte";
+	import NoFile from "$lib/assets/upload/no-file.svelte";
+	import LoadingButton from "$lib/assets/upload/loading-button.svelte";
+
+	const { addNotification } = getNotificationsContext();
+
+	$: files = $storageFiles ? $storageFiles : [];
+	let hidden6 = true;
+	let uploading = false;
+
+	let transitionParamsRight = {
+		x: 320,
+		duration: 200,
+		easing: sineIn,
+	};
+
+	async function handleKnowledgePaste(
+		e: CustomEvent<{ pasteUrlList: string[] }>
+	) {
+		uploading = true;
+		try {
+			const pasteUrlList = e.detail.pasteUrlList;
+			const res = await fetchKnowledgeBaseIdByPaste(pasteUrlList);
+			handleUploadResult(res, "knowledge_base");
+		} catch {
+			handleUploadError();
+		}
+	}
+
+	async function handleKnowledgeUpload(e: CustomEvent<any>) {
+		uploading = true;
+		try {
+			const blob = await fetch(e.detail.src).then((r) => r.blob());
+			const fileName = e.detail.fileName;
+			const res = await fetchKnowledgeBaseId(blob, fileName);
+			handleUploadResult(res, fileName);
+		} catch {
+			handleUploadError();
+		}
+	}
+
+	async function handleUploadResult(res: Response, fileName: string) {
+		if (res.status === 200) {
+			knowledge1.set({ id: "default" });
+			knowledgeName.set(fileName);
+			showNotification("Uploaded successfully", "success");
+			// update fileStructure
+			const res = await fetchAllFile();
+			uploading = false;
+			console.log('handleUploadResult', res);
+
+			if (res) {
+				storageFiles.set(res);
+				files = $storageFiles;
+			}
+		} else {
+			showNotification("Uploaded failed", "error");
+		}
+	}
+
+	function handleUploadError() {
+		showNotification("Uploaded failed", "error");
+	}
+
+	function showNotification(text: string, type: string) {
+		addNotification({
+			text: text,
+			position: "top-left",
+			type: type,
+			removeAfter: 3000,
+		});
+	}
+</script>
+
+<div class="text-center">
+	<Button
+		on:click={() => (hidden6 = false)}
+		class="bg-transparent focus-within:ring-gray-300 hover:bg-transparent focus:ring-0"
+		data-testid="open-upload"
+	>
+		<svg
+			aria-hidden="true"
+			class="h-7 w-7 text-blue-700"
+			fill="none"
+			stroke="currentColor"
+			viewBox="0 0 24 24"
+			xmlns="http://www.w3.org/2000/svg"
+			><path
+				stroke-linecap="round"
+				stroke-linejoin="round"
+				stroke-width="2"
+				d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M15 13l-3-3m0 0l-3 3m3-3v12"
+			/></svg
+		>
+	</Button>
+</div>
+
+<Drawer
+	backdrop={false}
+	placement="right"
+	transitionType="fly"
+	transitionParams={transitionParamsRight}
+	bind:hidden={hidden6}
+	class=" border-2 border-b-0 border-r-0 shadow"
+	id="sidebar6"
+>
+	<div class="flex items-center">
+		<h5
+			id="drawer-label"
+			class="mb-4 inline-flex items-center text-base font-semibold text-gray-500 dark:text-gray-400"
+		>
+			<InfoCircleSolid class="me-2.5 h-4 w-4" />Data Source
+		</h5>
+		<CloseButton
+			on:click={() => (hidden6 = true)}
+			class="mb-4 dark:text-white"
+		/>
+	</div>
+	<p class="mb-6 text-sm text-gray-500 dark:text-gray-400">
+		Please upload your local file or paste a remote file link, and Chat will
+		respond based on the content of the uploaded file.
+	</p>
+
+	<Tabs
+		style="full"
+		defaultClass="flex rounded-lg divide-x rtl:divide-x-reverse divide-gray-200 shadow dark:divide-gray-700 focus:ring-0"
+	>
+		<TabItem class="w-full" open>
+			<span slot="title">Upload File</span>
+			<UploadFile on:upload={handleKnowledgeUpload} />
+		</TabItem>
+		<TabItem class="w-full" data-testid="exchange-paste">
+			<span slot="title">Paste Link</span>
+			<PasteURL on:paste={handleKnowledgePaste} />
+		</TabItem>
+	</Tabs>
+	{#if uploading}
+		<div class="flex flex-col items-center justify-center my-6">
+			<LoadingButton />
+		</div>
+	{/if}
+
+	{#if files.length > 0}
+		<DocCard {files} />
+	{:else}
+		<div class="flex flex-col items-center justify-center mt-6">
+			<NoFile />
+			<p class=" text-sm opacity-70">No files uploaded</p>
+		</div>
+	{/if}
+</Drawer>
diff --git a/GraphRAG/ui/svelte/src/lib/shared/constant/Interface.ts b/GraphRAG/ui/svelte/src/lib/shared/constant/Interface.ts
new file mode 100644
index 0000000000..758756d5c6
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/constant/Interface.ts
@@ -0,0 +1,36 @@
+//  Copyright (C) 2024 Intel Corporation
+//  SPDX-License-Identifier: Apache-2.0
+
+export enum MessageRole {
+	Assistant,
+	User,
+}
+
+export enum MessageType {
+	Text,
+	SingleAudio,
+	AudioList,
+	SingleImage,
+	ImageList,
+	singleVideo,
+}
+
+type Map<T> = T extends MessageType.Text | MessageType.SingleAudio
+	? string
+	: T extends MessageType.AudioList
+		? string[]
+		: T extends MessageType.SingleImage
+			? { imgSrc: string; imgId: string }
+			: { imgSrc: string; imgId: string }[];
+
+export interface Message {
+	role: MessageRole;
+	type: MessageType;
+	content: Map<Message["type"]>;
+	time: number;
+}
+
+export enum LOCAL_STORAGE_KEY {
+	STORAGE_CHAT_KEY = "chatMessages",
+	STORAGE_TIME_KEY = "initTime",
+}
diff --git a/GraphRAG/ui/svelte/src/lib/shared/stores/common/Store.ts b/GraphRAG/ui/svelte/src/lib/shared/stores/common/Store.ts
new file mode 100644
index 0000000000..a9d8045680
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/lib/shared/stores/common/Store.ts
@@ -0,0 +1,30 @@
+//  Copyright (C) 2024 Intel Corporation
+//  SPDX-License-Identifier: Apache-2.0
+
+import { writable } from "svelte/store";
+
+export let open = writable(true);
+
+export let knowledgeAccess = writable(true);
+
+export let showTemplate = writable(false);
+
+export let showSidePage = writable(false);
+
+export let droppedObj = writable({});
+
+export let isLoading = writable(false);
+
+export let newUploadNum = writable(0);
+
+export let ifStoreMsg = writable(true);
+
+export const resetControl = writable(false);
+
+export const knowledge1 = writable<{
+	id: string;
+}>();
+
+export const knowledgeName = writable("");
+
+export const storageFiles = writable([]);
diff --git a/GraphRAG/ui/svelte/src/routes/+layout.svelte b/GraphRAG/ui/svelte/src/routes/+layout.svelte
new file mode 100644
index 0000000000..ccd163714b
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/routes/+layout.svelte
@@ -0,0 +1,37 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script>
+	import "tailwindcss/tailwind.css";
+	import "../app.postcss";
+	import Notifications from "svelte-notifications";
+	import Layout from "$lib/modules/frame/Layout.svelte";
+	import { onMount } from "svelte";
+
+	onMount(() => {
+		window.deviceType = window.innerWidth > 640 ? "pc" : "mobile";
+		window.onresize = () => {
+			window.deviceType = window.innerWidth > 640 ? "pc" : "mobile";
+		};
+		window.addEventListener("load", function () {
+			setTimeout(function () {
+				// This hides the address bar:
+				window.scrollTo(0, 1);
+			}, 0);
+		});
+
+	});
+</script>
+
+<Notifications>
+	<Layout>
+		<div class="flex h-full flex-col">
+			<div class="h-0 grow bg-white  lg:rounded-tl-3xl">
+				<slot />
+			</div>
+		</div>
+
+	</Layout>
+</Notifications>
diff --git a/GraphRAG/ui/svelte/src/routes/+page.svelte b/GraphRAG/ui/svelte/src/routes/+page.svelte
new file mode 100644
index 0000000000..7fab3b42bf
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/routes/+page.svelte
@@ -0,0 +1,300 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  SPDX-License-Identifier: Apache-2.0
+-->
+
+<script lang="ts">
+	export let data;
+	import { knowledge1, storageFiles } from "$lib/shared/stores/common/Store";
+	import { onMount } from "svelte";
+	import {
+		LOCAL_STORAGE_KEY,
+		MessageRole,
+		MessageType,
+		type Message,
+	} from "$lib/shared/constant/Interface";
+	import {
+		getCurrentTimeStamp,
+		scrollToBottom,
+		scrollToTop,
+	} from "$lib/shared/Utils";
+	import { fetchTextStream } from "$lib/network/chat/Network";
+	import LoadingAnimation from "$lib/shared/components/loading/Loading.svelte";
+	import "driver.js/dist/driver.css";
+	import "$lib/assets/layout/css/driver.css";
+	import UploadFile from "$lib/shared/components/upload/uploadFile.svelte";
+	import PaperAirplane from "$lib/assets/chat/svelte/PaperAirplane.svelte";
+	import Scrollbar from "$lib/shared/components/scrollbar/Scrollbar.svelte";
+	import ChatMessage from "$lib/modules/chat/ChatMessage.svelte";
+	import { fetchAllFile } from "$lib/network/upload/Network.js";
+	import { getNotificationsContext } from "svelte-notifications";
+
+	let query: string = "";
+	let loading: boolean = false;
+	let scrollToDiv: HTMLDivElement;
+	// ·········
+	let chatMessages: Message[] = data.chatMsg ? data.chatMsg : [];
+	const { addNotification } = getNotificationsContext();
+
+	// ··············
+
+	$: knowledge_1 = $knowledge1?.id ? $knowledge1.id : "default";
+
+	onMount(async () => {
+		scrollToDiv = document
+			.querySelector(".chat-scrollbar")
+			?.querySelector(".svlr-viewport")!;
+
+		const res = await fetchAllFile();
+		if (res) {
+			storageFiles.set(res);
+		}
+	});
+
+	function showNotification(text: string, type: string) {
+		addNotification({
+			text: text,
+			position: "top-left",
+			type: type,
+			removeAfter: 3000,
+		});
+	}
+
+	function handleTop() {
+		scrollToTop(scrollToDiv);
+	}
+
+	function storeMessages() {
+		localStorage.setItem(
+			LOCAL_STORAGE_KEY.STORAGE_CHAT_KEY,
+			JSON.stringify(chatMessages)
+		);
+	}
+
+	function decodeEscapedBytes(str: string): string {
+		const byteArray = str
+			.split("\\x")
+			.slice(1)
+			.map((byte) => parseInt(byte, 16));
+		const decoded = new TextDecoder("utf-8").decode(new Uint8Array(byteArray));
+
+		return decoded;
+	}
+
+	function decodeUnicode(str: string): string {
+		const decoded = str.replace(/\\u[\dA-Fa-f]{4}/g, (match) => {
+			return String.fromCharCode(parseInt(match.replace(/\\u/g, ""), 16));
+		});
+
+		return decoded;
+	}
+
+	const callTextStream = async (query: string, startSendTime: number) => {
+		try {
+			const eventSource = await fetchTextStream(query);
+			eventSource.addEventListener("error", (e: any) => {
+				if (e.type === "error") {
+					showNotification("Failed to load chat content.", "error");
+					loading = false;
+				}
+			});
+
+			eventSource.addEventListener("message", (e: any) => {
+				let msg = e.data;
+				console.log("msg", msg);
+
+				const handleDecodedMessage = (decodedMsg: string) => {
+					if (decodedMsg !== "</s>") {
+						decodedMsg = decodedMsg.replace(/\\n/g, "\n");
+					}
+
+					if (chatMessages[chatMessages.length - 1].role === MessageRole.User) {
+						chatMessages.push({
+							role: MessageRole.Assistant,
+							type: MessageType.Text,
+							content: decodedMsg,
+							time: startSendTime,
+						});
+					} else {
+						chatMessages[chatMessages.length - 1].content += decodedMsg;
+					}
+
+					scrollToBottom(scrollToDiv);
+				};
+
+				if (msg.startsWith("b")) {
+					let currentMsg = msg.slice(2, -1);
+
+					if (/\\x[\dA-Fa-f]{2}/.test(currentMsg)) {
+						currentMsg = decodeEscapedBytes(currentMsg);
+					} else if (/\\u[\dA-Fa-f]{4}/.test(currentMsg)) {
+						currentMsg = decodeUnicode(currentMsg);
+					}
+
+					handleDecodedMessage(currentMsg);
+				} else if (msg === "[DONE]") {
+					console.log("Done");
+
+					let startTime = chatMessages[chatMessages.length - 1].time;
+					loading = false;
+					let totalTime = parseFloat(
+						((getCurrentTimeStamp() - startTime) / 1000).toFixed(2)
+					);
+
+					if (chatMessages.length - 1 !== -1) {
+						chatMessages[chatMessages.length - 1].time = totalTime;
+					}
+
+					storeMessages();
+				} else {
+					if (/\\x[\dA-Fa-f]{2}/.test(msg)) {
+						msg = decodeEscapedBytes(msg);
+					} else if (/\\u[\dA-Fa-f]{4}/.test(msg)) {
+						msg = decodeUnicode(msg);
+					}
+
+					let currentMsg = msg.replace(/"/g, "").replace(/\\n/g, "\n");
+
+					handleDecodedMessage(currentMsg);
+				}
+			});
+
+			eventSource.stream();
+		} catch (error: any) {
+			showNotification("Failed to load chat content.", "error");
+			loading = false;
+		}
+	};
+
+	const handleTextSubmit = async () => {
+		loading = true;
+		const newMessage = {
+			role: MessageRole.User,
+			type: MessageType.Text,
+			content: query,
+			time: 0,
+		};
+		chatMessages = [...chatMessages, newMessage];
+		scrollToBottom(scrollToDiv);
+		storeMessages();
+		query = "";
+
+		await callTextStream(newMessage.content, getCurrentTimeStamp());
+
+		scrollToBottom(scrollToDiv);
+		storeMessages();
+	};
+
+	function handelClearHistory() {
+		localStorage.removeItem(LOCAL_STORAGE_KEY.STORAGE_CHAT_KEY);
+		chatMessages = [];
+	}
+</script>
+
+<!-- <DropZone on:drop={handleImageSubmit}> -->
+<div
+	class="h-full items-center gap-5 bg-white sm:flex sm:pb-2 lg:rounded-tl-3xl"
+>
+	<div class="mx-auto flex h-full w-full flex-col sm:mt-0 sm:w-[72%]">
+		<div class="flex justify-between p-2">
+			<p class="text-[1.7rem] font-bold tracking-tight">ChatQnA</p>
+			<UploadFile />
+		</div>
+		<div
+			class="fixed relative flex w-full flex-col items-center justify-between bg-white p-2 pb-0"
+		>
+			<div class="relative my-4 flex w-full flex-row justify-center">
+				<div class="relative w-full focus:border-none">
+					<input
+						class="text-md block w-full border-0 border-b-2 border-gray-300 px-1 py-4
+						text-gray-900 focus:border-gray-300 focus:ring-0 dark:border-gray-600 dark:bg-gray-700 dark:text-white dark:placeholder-gray-400 dark:focus:border-blue-500 dark:focus:ring-blue-500"
+						type="text"
+						data-testid="chat-input"
+						placeholder="Enter prompt here"
+						disabled={loading}
+						maxlength="1200"
+						bind:value={query}
+						on:keydown={(event) => {
+							if (event.key === "Enter" && !event.shiftKey && query) {
+								event.preventDefault();
+								handleTextSubmit();
+							}
+						}}
+					/>
+					<button
+						on:click={() => {
+							if (query) {
+								handleTextSubmit();
+							}
+						}}
+						type="submit"
+						id="send"
+						class="absolute bottom-2.5 end-2.5 px-4 py-2 text-sm font-medium text-white dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800"
+						><PaperAirplane /></button
+					>
+				</div>
+			</div>
+		</div>
+
+		<!-- clear -->
+		{#if Array.isArray(chatMessages) && chatMessages.length > 0 && !loading}
+			<div class="flex w-full justify-between pr-5">
+				<div class="flex items-center">
+					<button
+						class="bg-primary text-primary-foreground hover:bg-primary/90 group flex items-center justify-center space-x-2 p-2"
+						type="button"
+						data-testid="clear-chat"
+						on:click={() => handelClearHistory()}
+						><svg
+							xmlns="http://www.w3.org/2000/svg"
+							viewBox="0 0 20 20"
+							width="24"
+							height="24"
+							class="fill-[#0597ff] group-hover:fill-[#0597ff]"
+							><path
+								d="M12.6 12 10 9.4 7.4 12 6 10.6 8.6 8 6 5.4 7.4 4 10 6.6 12.6 4 14 5.4 11.4 8l2.6 2.6zm7.4 8V2q0-.824-.587-1.412A1.93 1.93 0 0 0 18 0H2Q1.176 0 .588.588A1.93 1.93 0 0 0 0 2v12q0 .825.588 1.412Q1.175 16 2 16h14zm-3.15-6H2V2h16v13.125z"
+							/></svg
+						><span class="font-medium text-[#0597ff]">CLEAR</span></button
+					>
+				</div>
+			</div>
+		{/if}
+		<!-- clear -->
+
+		<div class="mx-auto flex h-full w-full flex-col" data-testid="chat-message">
+			<Scrollbar
+				classLayout="flex flex-col gap-1 mr-4"
+				className="chat-scrollbar h-0 w-full grow px-2 pt-2 mt-3 mr-5"
+			>
+				{#each chatMessages as message, i}
+					<ChatMessage
+						on:scrollTop={() => handleTop()}
+						msg={message}
+						time={i === 0 || (message.time > 0 && message.time < 100)
+							? message.time
+							: ""}
+					/>
+				{/each}
+			</Scrollbar>
+			<!-- Loading text -->
+			{#if loading}
+				<LoadingAnimation />
+			{/if}
+		</div>
+		<!-- gallery -->
+	</div>
+</div>
+
+<style>
+	.row::-webkit-scrollbar {
+		display: none;
+	}
+
+	.row {
+		scrollbar-width: none;
+	}
+
+	.row {
+		-ms-overflow-style: none;
+	}
+</style>
diff --git a/GraphRAG/ui/svelte/src/routes/+page.ts b/GraphRAG/ui/svelte/src/routes/+page.ts
new file mode 100644
index 0000000000..a9da50726e
--- /dev/null
+++ b/GraphRAG/ui/svelte/src/routes/+page.ts
@@ -0,0 +1,15 @@
+//  Copyright (C) 2024 Intel Corporation
+//  SPDX-License-Identifier: Apache-2.0
+
+import { browser } from "$app/environment";
+import { LOCAL_STORAGE_KEY } from "$lib/shared/constant/Interface";
+
+export const load = async () => {
+	if (browser) {
+		const chat = localStorage.getItem(LOCAL_STORAGE_KEY.STORAGE_CHAT_KEY);
+
+		return {
+			chatMsg: JSON.parse(chat || "[]"),
+		};
+	}
+};
diff --git a/GraphRAG/ui/svelte/static/favicon.png b/GraphRAG/ui/svelte/static/favicon.png
new file mode 100644
index 0000000000..75b997f815
Binary files /dev/null and b/GraphRAG/ui/svelte/static/favicon.png differ
diff --git a/GraphRAG/ui/svelte/svelte.config.js b/GraphRAG/ui/svelte/svelte.config.js
new file mode 100644
index 0000000000..0f2977ecce
--- /dev/null
+++ b/GraphRAG/ui/svelte/svelte.config.js
@@ -0,0 +1,38 @@
+// Copyright (c) 2024 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import adapter from "@sveltejs/adapter-auto";
+import preprocess from "svelte-preprocess";
+import postcssPresetEnv from "postcss-preset-env";
+
+/** @type {import('@sveltejs/kit').Config} */
+const config = {
+	// Consult https://github.com/sveltejs/svelte-preprocess
+	// for more information about preprocessors
+	preprocess: preprocess({
+		sourceMap: true,
+		postcss: {
+			plugins: [postcssPresetEnv({ features: { "nesting-rules": true } })],
+		},
+	}),
+
+	kit: {
+		adapter: adapter(),
+		env: {
+			publicPrefix: "",
+		},
+	},
+};
+
+export default config;
diff --git a/GraphRAG/ui/svelte/tailwind.config.cjs b/GraphRAG/ui/svelte/tailwind.config.cjs
new file mode 100644
index 0000000000..67a1108096
--- /dev/null
+++ b/GraphRAG/ui/svelte/tailwind.config.cjs
@@ -0,0 +1,32 @@
+//  Copyright (C) 2024 Intel Corporation
+//  SPDX-License-Identifier: Apache-2.0
+
+const config = {
+	content: ["./src/**/*.{html,js,svelte,ts}", "./node_modules/flowbite-svelte/**/*.{html,js,svelte,ts}"],
+
+	plugins: [require("flowbite/plugin")],
+
+	darkMode: "class",
+
+	theme: {
+		extend: {
+			colors: {
+				// flowbite-svelte
+				primary: {
+					50: "#FFF5F2",
+					100: "#FFF1EE",
+					200: "#FFE4DE",
+					300: "#FFD5CC",
+					400: "#FFBCAD",
+					500: "#FE795D",
+					600: "#EF562F",
+					700: "#EB4F27",
+					800: "#CC4522",
+					900: "#A5371B",
+				},
+			},
+		},
+	},
+};
+
+module.exports = config;
diff --git a/GraphRAG/ui/svelte/tests/chatQnA.spec.ts b/GraphRAG/ui/svelte/tests/chatQnA.spec.ts
new file mode 100644
index 0000000000..1c7f308e7a
--- /dev/null
+++ b/GraphRAG/ui/svelte/tests/chatQnA.spec.ts
@@ -0,0 +1,82 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+import { test, expect, type Page } from "@playwright/test";
+import { fileURLToPath } from "url";
+import path from "path";
+
+// Get the path of the current file
+const __filename = fileURLToPath(import.meta.url);
+// Get the directory path of the current file
+const __dirname = path.dirname(__filename);
+
+// Define the file path
+const FILE_PATH = path.resolve(__dirname, "test_file.txt");
+
+// Initialization before each test
+test.beforeEach(async ({ page }) => {
+	await page.goto("/");
+});
+
+// Constants definition
+const CHAT_ITEMS = ["What is the total revenue of Nike in 2023?"];
+const UPLOAD_LINK = ["https://www.ces.tech/"];
+
+// Helper function: Check notification text
+async function checkNotificationText(page: Page, expectedText: string) {
+	const notification = await page.waitForSelector(".notification");
+	const notificationText = await notification.textContent();
+	expect(notificationText).toContain(expectedText);
+}
+
+// Helper function: Enter message to chat
+async function enterMessageToChat(page: Page, message: string) {
+	await page.getByTestId("chat-input").click();
+	await page.getByTestId("chat-input").fill(message);
+	await page.getByTestId("chat-input").press("Enter");
+	await page.waitForTimeout(10000);
+	await expect(page.getByTestId("display-answer")).toBeVisible();
+}
+
+// Helper function: Upload file
+async function uploadFile(page: Page, filePath: string) {
+	const fileUpload = page.getByTestId("file-upload");
+	await fileUpload.setInputFiles(filePath);
+	await checkNotificationText(page, "Uploaded successfully");
+}
+
+// Helper function: Paste link
+async function pasteLink(page: Page, link: string) {
+	await page.getByTestId("paste-link").fill(link);
+	const pasteClick = page.getByTestId("paste-click");
+	await pasteClick.click();
+	await checkNotificationText(page, "Uploaded successfully");
+}
+
+// Test description: New Chat
+test.describe("New Chat", () => {
+	// Test: Enter message to chat
+	test("should enter message to chat", async ({ page }) => {
+		await enterMessageToChat(page, CHAT_ITEMS[0]);
+	});
+});
+
+// Test description: Upload file
+test.describe("Upload file", () => {
+	// Test: Upload file
+	test("should upload a file", async ({ page }) => {
+		await page.waitForTimeout(10000);
+		await page.getByTestId("open-upload").click();
+		await page.waitForTimeout(10000);
+		await uploadFile(page, FILE_PATH);
+	});
+
+	// Test: Paste link
+	test("should paste link", async ({ page }) => {
+		await page.waitForTimeout(10000);
+		await page.getByTestId("open-upload").click();
+		await page.waitForTimeout(10000);
+		await page.getByTestId("exchange-paste").click();
+		await pasteLink(page, UPLOAD_LINK[0]);
+	});
+});
diff --git a/GraphRAG/ui/svelte/tests/test_file.txt b/GraphRAG/ui/svelte/tests/test_file.txt
new file mode 100644
index 0000000000..bfbd73c1c3
--- /dev/null
+++ b/GraphRAG/ui/svelte/tests/test_file.txt
@@ -0,0 +1,104 @@
+Follow CES
+EXHIBIT
+REGISTER
+All together. All ON.
+Registration is now open for CES® 2024 — taking place Jan. 9-12, in Las Vegas.
+Flip the switch on global business opportunity with CES, where you can meet with partners, customers, media, investors, and policymakers from across the industry and the world all in one place.
+Don't miss your chance to be a part of the most powerful tech event in the world.
+KEYNOTE ANNOUNCEMENT
+Qualcomm CEO to Keynote CES 2024
+ Cristiano Amon
+ President and CEO, Qualcomm
+Anticipated Numbers for CES 2024
+130K+
+attendees
+1000+
+startups within Eureka Park 
+3500+
+exhibitors and a sold-out West Hall 
+ Learn more
+ arrow-black
+Featured Podcast
+Li-Fi Unleashes the Future of Esports
+Listen Nowarrow-black
+Featured Event
+Apply Today for CTA Match
+Learn Morearrow-black
+CES 2024 is ALL ON
+×
+Vehicle Technology
+With features like adaptive cruise control, collision prevention and lane guidance, technology is paving the way to safer roads. Discover what’s driving the innovations behind concept cars, connected vehicles and autonomous mobility. 
+Learn Morearrow-black
+ 
+Featured Podcast
+On the Fast Track to Autonomous Driving: Mobileye
+Read Morearrow-black
+Brunswick Boating Tech Smooths Rough Waters
+Register for CES 2024
+See the next generation of innovation at CES 2024.
+ Register Now
+Featured Exhibitors
+See the companies from across the globe that will be showcasing the latest in digital health, food tech, automotive tech, NFTs, gaming, smart home and more.
+ 
+ 
+ 
+ View 2024 Featured Exhibitors
+ arrow-black
+Apply for Eureka Park™️
+Eureka Park is the buzzworthy startup arena that provides a unique opportunity to launch a new product, service or idea. If you’re looking for your big break in the tech industry, Eureka Park is the place for you.
+ Apply Now
+Exhibit at CES
+CES is where business gets done.
+CASE STUDY
+VW
+Over a busy four days in Las Vegas, Volkswagen showed the world and media that its accomplishments transcend the legacy auto sector. It’s ID.7 sedan promises stellar performance and efficiency with a 435-mile range plus impressive features.
+Read Morearrow-black
+600K
+interactions on social media
+CES is a great opportunity, both from a business perspective in the networking sense and seeing what's going on in the tech field, and also it's a great opportunity from a media perspective because we see massive media attendance and we see a great deal of coverage.
+MARK GILLIES
+DIRECTOR OF PUBLIC RELATIONS AND REPUTATION, VOLKSWAGEN
+CES Success Stories 2023: Volkswagon
+×
+Want to Exhibit at CES 2024?
+Showcase your brand, launch your latest products and win business at the ultimate platform for innovation.
+ Contact Us
+Latest Articles
+ CES 2024 Sector Trends: Digital Health
+Read more
+arrow-black
+Nasdaq Keynote, CES 2024: Finance Taps Tech for Humanity
+Read more
+arrow-black
+Walmart Keynote at CES 2024: Disruptive Retail Tech
+Read more
+arrow-black
+Press Releases
+Qualcomm CEO Cristiano Amon to Highlight How We Will Interact with Our Devices in the AI Age During CES 2024 Keynote
+HD Hyundai to Keynote CES 2024 
+Elevance Health’s Gail Boudreaux to Keynote CES 2024
+ View all press releases
+ arrow-black
+CES is owned and produced by the Consumer Technology Association, which provides the ultimate platform for technology leaders to connect, collaborate, and propel consumer technology forward.
+ Become a CTA Member
+About CES
+CES Events
+Innovation Awards
+CES Tech Talk Podcast
+Promote Your Brand
+Topics
+Articles
+CES Success Stories
+Schedule
+Our Partners
+Information for:
+Exhibitors
+Media
+International
+Follow CES
+Code of Conduct
+Terms of Use
+Privacy
+Sitemap
+Copyright © 2003 - 2023. All rights reserved.
+CTATECH-PROD2
diff --git a/GraphRAG/ui/svelte/tsconfig.json b/GraphRAG/ui/svelte/tsconfig.json
new file mode 100644
index 0000000000..b0135d48eb
--- /dev/null
+++ b/GraphRAG/ui/svelte/tsconfig.json
@@ -0,0 +1,16 @@
+{
+	"extends": "./.svelte-kit/tsconfig.json",
+	"compilerOptions": {
+		"allowJs": true,
+		"checkJs": true,
+		"esModuleInterop": true,
+		"forceConsistentCasingInFileNames": true,
+		"resolveJsonModule": true,
+		"skipLibCheck": true,
+		"sourceMap": true,
+		"strict": true,
+		"module": "ESNext",
+		"target": "ES6",
+		"outDir": "./dist"
+	}
+}
diff --git a/GraphRAG/ui/svelte/vite.config.ts b/GraphRAG/ui/svelte/vite.config.ts
new file mode 100644
index 0000000000..62cc3f9a47
--- /dev/null
+++ b/GraphRAG/ui/svelte/vite.config.ts
@@ -0,0 +1,12 @@
+//  Copyright (C) 2024 Intel Corporation
+//  SPDX-License-Identifier: Apache-2.0
+
+import { sveltekit } from "@sveltejs/kit/vite";
+import type { UserConfig } from "vite";
+
+const config: UserConfig = {
+	plugins: [sveltekit()],
+	server: {},
+};
+
+export default config;
diff --git a/LICENSE b/LICENSE
index 2bb9ad240f..d9a10c0d8e 100644
--- a/LICENSE
+++ b/LICENSE
@@ -173,4 +173,4 @@
       incurred by, or claims asserted against, such Contributor by reason
       of your accepting any such warranty or additional liability.
 
-   END OF TERMS AND CONDITIONS
\ No newline at end of file
+   END OF TERMS AND CONDITIONS
diff --git a/MultimodalQnA/Dockerfile b/MultimodalQnA/Dockerfile
index ec221c03ea..534203c96e 100644
--- a/MultimodalQnA/Dockerfile
+++ b/MultimodalQnA/Dockerfile
@@ -16,7 +16,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./multimodalqna.py /home/user/multimodalqna.py
diff --git a/MultimodalQnA/README.md b/MultimodalQnA/README.md
index 95626aa78e..08de5686ab 100644
--- a/MultimodalQnA/README.md
+++ b/MultimodalQnA/README.md
@@ -2,7 +2,7 @@
 
 Suppose you possess a set of videos and wish to perform question-answering to extract insights from these videos. To respond to your questions, it typically necessitates comprehension of visual cues within the videos, knowledge derived from the audio content, or often a mix of both these visual elements and auditory facts. The MultimodalQnA framework offers an optimal solution for this purpose.
 
-`MultimodalQnA` addresses your questions by dynamically fetching the most pertinent multimodal information (frames, transcripts, and/or captions) from your collection of videos. For this purpose, MultimodalQnA utilizes [BridgeTower model](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi), a multimodal encoding transformer model which merges visual and textual data into a unified semantic space. During the video ingestion phase, the BridgeTower model embeds both visual cues and auditory facts as texts, and those embeddings are then stored in a vector database. When it comes to answering a question, the MultimodalQnA will fetch its most relevant multimodal content from the vector store and feed it into a downstream Large Vision-Language Model (LVM) as input context to generate a response for the user.
+`MultimodalQnA` addresses your questions by dynamically fetching the most pertinent multimodal information (frames, transcripts, and/or captions) from your collection of videos, images, and audio files. For this purpose, MultimodalQnA utilizes [BridgeTower model](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi), a multimodal encoding transformer model which merges visual and textual data into a unified semantic space. During the ingestion phase, the BridgeTower model embeds both visual cues and auditory facts as texts, and those embeddings are then stored in a vector database. When it comes to answering a question, the MultimodalQnA will fetch its most relevant multimodal content from the vector store and feed it into a downstream Large Vision-Language Model (LVM) as input context to generate a response for the user.
 
 The MultimodalQnA architecture shows below:
 
@@ -100,10 +100,12 @@ In the below, we provide a table that describes for each microservice component
 
 By default, the embedding and LVM models are set to a default value as listed below:
 
-| Service              | Model                                       |
-| -------------------- | ------------------------------------------- |
-| embedding-multimodal | BridgeTower/bridgetower-large-itm-mlm-gaudi |
-| LVM                  | llava-hf/llava-v1.6-vicuna-13b-hf           |
+| Service              | HW    | Model                                     |
+| -------------------- | ----- | ----------------------------------------- |
+| embedding-multimodal | Xeon  | BridgeTower/bridgetower-large-itm-mlm-itc |
+| LVM                  | Xeon  | llava-hf/llava-1.5-7b-hf                  |
+| embedding-multimodal | Gaudi | BridgeTower/bridgetower-large-itm-mlm-itc |
+| LVM                  | Gaudi | llava-hf/llava-v1.6-vicuna-13b-hf         |
 
 You can choose other LVM models, such as `llava-hf/llava-1.5-7b-hf ` and `llava-hf/llava-1.5-13b-hf`, as needed.
 
diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md b/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md
index 9b3a3edaa2..d0a1c7d279 100644
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md
@@ -84,16 +84,18 @@ export INDEX_NAME="mm-rag-redis"
 export LLAVA_SERVER_PORT=8399
 export LVM_ENDPOINT="http://${host_ip}:8399"
 export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
+export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
 export WHISPER_MODEL="base"
 export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
 export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
 export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
 ```
 
 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -274,54 +276,76 @@ curl http://${host_ip}:9399/v1/lvm \
 
 6. dataprep-multimodal-redis
 
-Download a sample video
+Download a sample video, image, and audio file and create a caption
 
 ```bash
 export video_fn="WeAreGoingOnBullrun.mp4"
 wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/WeAreGoingOnBullrun.mp4 -O ${video_fn}
+
+export image_fn="apple.png"
+wget https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true -O ${image_fn}
+
+export caption_fn="apple.txt"
+echo "This is an apple."  > ${caption_fn}
+
+export audio_fn="AudioSample.wav"
+wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav -O ${audio_fn}
 ```
 
-Test dataprep microservice. This command updates a knowledge base by uploading a local video .mp4.
+Test dataprep microservice with generating transcript. This command updates a knowledge base by uploading a local video .mp4 and an audio .wav file.
 
 ```bash
 curl --silent --write-out "HTTPSTATUS:%{http_code}" \
     ${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT} \
     -H 'Content-Type: multipart/form-data' \
-    -X POST -F "files=@./${video_fn}"
+    -X POST \
+    -F "files=@./${video_fn}" \
+    -F "files=@./${audio_fn}"
 ```
 
-Also, test dataprep microservice with generating caption using lvm microservice
+Also, test dataprep microservice with generating an image caption using lvm microservice
 
 ```bash
 curl --silent --write-out "HTTPSTATUS:%{http_code}" \
     ${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT} \
     -H 'Content-Type: multipart/form-data' \
-    -X POST -F "files=@./${video_fn}"
+    -X POST -F "files=@./${image_fn}"
+```
+
+Now, test the microservice with posting a custom caption along with an image
+
+```bash
+curl --silent --write-out "HTTPSTATUS:%{http_code}" \
+    ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
+    -H 'Content-Type: multipart/form-data' \
+    -X POST -F "files=@./${image_fn}" -F "files=@./${caption_fn}"
 ```
 
-Also, you are able to get the list of all videos that you uploaded:
+Also, you are able to get the list of all files that you uploaded:
 
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    ${DATAPREP_GET_VIDEO_ENDPOINT}
+    ${DATAPREP_GET_FILE_ENDPOINT}
 ```
 
-Then you will get the response python-style LIST like this. Notice the name of each uploaded video e.g., `videoname.mp4` will become `videoname_uuid.mp4` where `uuid` is a unique ID for each uploaded video. The same video that are uploaded twice will have different `uuid`.
+Then you will get the response python-style LIST like this. Notice the name of each uploaded file e.g., `videoname.mp4` will become `videoname_uuid.mp4` where `uuid` is a unique ID for each uploaded file. The same files that are uploaded twice will have different `uuid`.
 
 ```bash
 [
     "WeAreGoingOnBullrun_7ac553a1-116c-40a2-9fc5-deccbb89b507.mp4",
-    "WeAreGoingOnBullrun_6d13cf26-8ba2-4026-a3a9-ab2e5eb73a29.mp4"
+    "WeAreGoingOnBullrun_6d13cf26-8ba2-4026-a3a9-ab2e5eb73a29.mp4",
+    "apple_fcade6e6-11a5-44a2-833a-3e534cbe4419.png",
+    "AudioSample_976a85a6-dc3e-43ab-966c-9d81beef780c.wav
 ]
 ```
 
-To delete all uploaded videos along with data indexed with `$INDEX_NAME` in REDIS.
+To delete all uploaded files along with data indexed with `$INDEX_NAME` in REDIS.
 
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    ${DATAPREP_DELETE_VIDEO_ENDPOINT}
+    ${DATAPREP_DELETE_FILE_ENDPOINT}
 ```
 
 7. MegaService
diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
index d9bf3bce94..eece99da85 100644
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -36,6 +36,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       PORT: ${EMBEDDER_PORT}
+    entrypoint: ["python", "bridgetower_server.py", "--device", "cpu", "--model_name_or_path", $EMBEDDING_MODEL_ID]
     restart: unless-stopped
   embedding-multimodal:
     image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
@@ -76,6 +77,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
+    entrypoint: ["python", "llava_server.py", "--device", "cpu", "--model_name_or_path", $LVM_MODEL_ID]
     restart: unless-stopped
   lvm-llava-svc:
     image: ${REGISTRY:-opea}/lvm-llava-svc:${TAG:-latest}
@@ -125,6 +127,7 @@ services:
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
+      - DATAPREP_INGEST_SERVICE_ENDPOINT=${DATAPREP_INGEST_SERVICE_ENDPOINT}
       - DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT=${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}
       - DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT=${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}
     ipc: host
diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
index ca5e650ff4..d8824fb0bb 100755
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -15,13 +15,15 @@ export INDEX_NAME="mm-rag-redis"
 export LLAVA_SERVER_PORT=8399
 export LVM_ENDPOINT="http://${host_ip}:8399"
 export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
+export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
 export WHISPER_MODEL="base"
 export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
 export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
 export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md
index 6517b100c4..9e7db70b79 100644
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -40,10 +40,11 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
 export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
 export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
 ```
 
 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -79,7 +80,7 @@ docker build --no-cache -t opea/retriever-multimodal-redis:latest --build-arg ht
 Build TGI Gaudi image
 
 ```bash
-docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
 ```
 
 Build lvm-tgi microservice image
@@ -117,7 +118,7 @@ Then run the command `docker images`, you will have the following 8 Docker Image
 
 1. `opea/dataprep-multimodal-redis:latest`
 2. `opea/lvm-tgi:latest`
-3. `ghcr.io/huggingface/tgi-gaudi:2.0.5`
+3. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
 4. `opea/retriever-multimodal-redis:latest`
 5. `opea/embedding-multimodal:latest`
 6. `opea/embedding-multimodal-bridgetower:latest`
@@ -224,56 +225,76 @@ curl http://${host_ip}:9399/v1/lvm \
 
 6. Multimodal Dataprep Microservice
 
-Download a sample video
+Download a sample video, image, and audio file and create a caption
 
 ```bash
 export video_fn="WeAreGoingOnBullrun.mp4"
 wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/WeAreGoingOnBullrun.mp4 -O ${video_fn}
-```
 
-Test dataprep microservice. This command updates a knowledge base by uploading a local video .mp4.
+export image_fn="apple.png"
+wget https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true -O ${image_fn}
+
+export caption_fn="apple.txt"
+echo "This is an apple."  > ${caption_fn}
+
+export audio_fn="AudioSample.wav"
+wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav -O ${audio_fn}
+```
 
-Test dataprep microservice with generating transcript using whisper model
+Test dataprep microservice with generating transcript. This command updates a knowledge base by uploading a local video .mp4 and an audio .wav file.
 
 ```bash
 curl --silent --write-out "HTTPSTATUS:%{http_code}" \
     ${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT} \
     -H 'Content-Type: multipart/form-data' \
-    -X POST -F "files=@./${video_fn}"
+    -X POST \
+    -F "files=@./${video_fn}" \
+    -F "files=@./${audio_fn}"
 ```
 
-Also, test dataprep microservice with generating caption using lvm-tgi
+Also, test dataprep microservice with generating an image caption using lvm-tgi
 
 ```bash
 curl --silent --write-out "HTTPSTATUS:%{http_code}" \
     ${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT} \
     -H 'Content-Type: multipart/form-data' \
-    -X POST -F "files=@./${video_fn}"
+    -X POST -F "files=@./${image_fn}"
+```
+
+Now, test the microservice with posting a custom caption along with an image
+
+```bash
+curl --silent --write-out "HTTPSTATUS:%{http_code}" \
+    ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
+    -H 'Content-Type: multipart/form-data' \
+    -X POST -F "files=@./${image_fn}" -F "files=@./${caption_fn}"
 ```
 
-Also, you are able to get the list of all videos that you uploaded:
+Also, you are able to get the list of all files that you uploaded:
 
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    ${DATAPREP_GET_VIDEO_ENDPOINT}
+    ${DATAPREP_GET_FILE_ENDPOINT}
 ```
 
-Then you will get the response python-style LIST like this. Notice the name of each uploaded video e.g., `videoname.mp4` will become `videoname_uuid.mp4` where `uuid` is a unique ID for each uploaded video. The same video that are uploaded twice will have different `uuid`.
+Then you will get the response python-style LIST like this. Notice the name of each uploaded file e.g., `videoname.mp4` will become `videoname_uuid.mp4` where `uuid` is a unique ID for each uploaded file. The same files that are uploaded twice will have different `uuid`.
 
 ```bash
 [
     "WeAreGoingOnBullrun_7ac553a1-116c-40a2-9fc5-deccbb89b507.mp4",
-    "WeAreGoingOnBullrun_6d13cf26-8ba2-4026-a3a9-ab2e5eb73a29.mp4"
+    "WeAreGoingOnBullrun_6d13cf26-8ba2-4026-a3a9-ab2e5eb73a29.mp4",
+    "apple_fcade6e6-11a5-44a2-833a-3e534cbe4419.png",
+    "AudioSample_976a85a6-dc3e-43ab-966c-9d81beef780c.wav
 ]
 ```
 
-To delete all uploaded videos along with data indexed with `$INDEX_NAME` in REDIS.
+To delete all uploaded files along with data indexed with `$INDEX_NAME` in REDIS.
 
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    ${DATAPREP_DELETE_VIDEO_ENDPOINT}
+    ${DATAPREP_DELETE_FILE_ENDPOINT}
 ```
 
 7. MegaService
diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index d7ac74084d..ddaf2b09d1 100644
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -36,6 +36,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       PORT: ${EMBEDDER_PORT}
+    entrypoint: ["python", "bridgetower_server.py", "--device", "hpu", "--model_name_or_path", $EMBEDDING_MODEL_ID]
     restart: unless-stopped
   embedding-multimodal:
     image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
@@ -68,7 +69,7 @@ services:
       INDEX_NAME: ${INDEX_NAME}
     restart: unless-stopped
   tgi-gaudi:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-llava-gaudi-server
     ports:
       - "8399:80"
@@ -83,6 +84,10 @@ services:
       PREFILL_BATCH_BUCKET_SIZE: 1
       BATCH_BUCKET_SIZE: 1
       MAX_BATCH_TOTAL_TOKENS: 4096
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
     runtime: habana
     cap_add:
       - SYS_NICE
@@ -139,6 +144,7 @@ services:
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
+      - DATAPREP_INGEST_SERVICE_ENDPOINT=${DATAPREP_INGEST_SERVICE_ENDPOINT}
       - DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT=${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}
       - DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT=${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}
     ipc: host
diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
index 211a1a6963..b5be052e1c 100755
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -22,7 +22,8 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
 export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
 export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
diff --git a/MultimodalQnA/tests/test_compose_on_gaudi.sh b/MultimodalQnA/tests/test_compose_on_gaudi.sh
index dd7af39fbb..5ac1228dbf 100644
--- a/MultimodalQnA/tests/test_compose_on_gaudi.sh
+++ b/MultimodalQnA/tests/test_compose_on_gaudi.sh
@@ -14,17 +14,18 @@ WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
 
+export image_fn="apple.png"
 export video_fn="WeAreGoingOnBullrun.mp4"
+export caption_fn="apple.txt"
 
 function build_docker_images() {
     cd $WORKPATH/docker_image_build
     git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
-
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
     service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-multimodal-redis lvm-tgi dataprep-multimodal-redis"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
 
     docker images && sleep 1s
 }
@@ -40,17 +41,18 @@ function setup_env() {
     export LLAVA_SERVER_PORT=8399
     export LVM_ENDPOINT="http://${host_ip}:8399"
     export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
-    export LVM_MODEL_ID="llava-hf/llava-v1.6-vicuna-13b-hf"
+    export LVM_MODEL_ID="llava-hf/llava-v1.6-vicuna-7b-hf"
     export WHISPER_MODEL="base"
     export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
     export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
     export LVM_SERVICE_HOST_IP=${host_ip}
     export MEGA_SERVICE_HOST_IP=${host_ip}
     export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
     export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
     export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-    export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-    export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
 }
 
 function start_services() {
@@ -63,12 +65,15 @@ function start_services() {
 
 function prepare_data() {
     cd $LOG_PATH
-    echo "Downloading video"
+    echo "Downloading image and video"
+    wget https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true -O ${image_fn}
     wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/WeAreGoingOnBullrun.mp4 -O ${video_fn}
+    echo "Writing caption file"
+    echo "This is an apple."  > ${caption_fn}
 
     sleep 30s
-
 }
+
 function validate_service() {
     local URL="$1"
     local EXPECTED_RESULT="$2"
@@ -76,9 +81,15 @@ function validate_service() {
     local DOCKER_NAME="$4"
     local INPUT_DATA="$5"
 
-    if [[ $SERVICE_NAME == *"dataprep-multimodal-redis"* ]]; then
+    if [[ $SERVICE_NAME == *"dataprep-multimodal-redis-transcript"* ]]; then
         cd $LOG_PATH
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${video_fn}" -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep-multimodal-redis-caption"* ]]; then
+         cd $LOG_PATH
+         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${image_fn}" -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep-multimodal-redis-ingest"* ]]; then
+        cd $LOG_PATH
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${image_fn}" -F "files=@./apple.txt" -H 'Content-Type: multipart/form-data' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
@@ -147,27 +158,34 @@ function validate_microservices() {
     sleep 1m # retrieval can't curl as expected, try to wait for more time
 
     # test data prep
-    echo "Data Prep with Generating Transcript"
+    echo "Data Prep with Generating Transcript for Video"
     validate_service \
         "${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}" \
         "Data preparation succeeded" \
-        "dataprep-multimodal-redis" \
+        "dataprep-multimodal-redis-transcript" \
         "dataprep-multimodal-redis"
 
-    echo "Data Prep with Generating Transcript"
+    echo "Data Prep with Image & Caption Ingestion"
     validate_service \
-        "${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}" \
+        "${DATAPREP_INGEST_SERVICE_ENDPOINT}" \
         "Data preparation succeeded" \
-        "dataprep-multimodal-redis" \
+        "dataprep-multimodal-redis-ingest" \
         "dataprep-multimodal-redis"
 
-    echo "Validating get file"
+    echo "Validating get file returns mp4"
     validate_service \
-        "${DATAPREP_GET_VIDEO_ENDPOINT}" \
+        "${DATAPREP_GET_FILE_ENDPOINT}" \
         '.mp4' \
         "dataprep_get" \
         "dataprep-multimodal-redis"
 
+    echo "Validating get file returns png"
+    validate_service \
+        "${DATAPREP_GET_FILE_ENDPOINT}" \
+        '.png' \
+        "dataprep_get" \
+        "dataprep-multimodal-redis"
+
     sleep 1m
 
     # multimodal retrieval microservice
@@ -180,7 +198,7 @@ function validate_microservices() {
         "retriever-multimodal-redis" \
         "{\"text\":\"test\",\"embedding\":${your_embedding}}"
 
-    sleep 10s
+    sleep 3m
 
     # llava server
     echo "Evaluating LLAVA tgi-gaudi"
@@ -200,6 +218,14 @@ function validate_microservices() {
         "lvm-tgi" \
         '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image", "video_id": "8c7461df-b373-4a00-8696-9a2234359fe0", "time_of_frame_ms":"37000000", "source_video":"WeAreGoingOnBullrun_8c7461df-b373-4a00-8696-9a2234359fe0.mp4"}], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}'
 
+    # data prep requiring lvm
+    echo "Data Prep with Generating Caption for Image"
+    validate_service \
+        "${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}" \
+        "Data preparation succeeded" \
+        "dataprep-multimodal-redis-caption" \
+        "dataprep-multimodal-redis"
+
     sleep 1m
 }
 
@@ -224,14 +250,22 @@ function validate_megaservice() {
 }
 
 function validate_delete {
-    echo "Validate data prep delete videos"
+    echo "Validate data prep delete files"
     validate_service \
-        "${DATAPREP_DELETE_VIDEO_ENDPOINT}" \
+        "${DATAPREP_DELETE_FILE_ENDPOINT}" \
         '{"status":true}' \
         "dataprep_del" \
         "dataprep-multimodal-redis"
 }
 
+function delete_data() {
+    cd $LOG_PATH
+    echo "Deleting image, video, and caption"
+    rm -rf ${image_fn}
+    rm -rf ${video_fn}
+    rm -rf ${caption_fn}
+}
+
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/hpu/gaudi
     docker compose -f compose.yaml stop && docker compose -f compose.yaml rm -f
@@ -256,6 +290,7 @@ function main() {
     validate_delete
     echo "==== delete validated ===="
 
+    delete_data
     stop_docker
     echo y | docker system prune
 
diff --git a/MultimodalQnA/tests/test_compose_on_xeon.sh b/MultimodalQnA/tests/test_compose_on_xeon.sh
index 46042c600c..7d3ab0faee 100644
--- a/MultimodalQnA/tests/test_compose_on_xeon.sh
+++ b/MultimodalQnA/tests/test_compose_on_xeon.sh
@@ -14,7 +14,9 @@ WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
 
+export image_fn="apple.png"
 export video_fn="WeAreGoingOnBullrun.mp4"
+export caption_fn="apple.txt"
 
 function build_docker_images() {
     cd $WORKPATH/docker_image_build
@@ -37,6 +39,7 @@ function setup_env() {
     export INDEX_NAME="mm-rag-redis"
     export LLAVA_SERVER_PORT=8399
     export LVM_ENDPOINT="http://${host_ip}:8399"
+    export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
     export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
     export WHISPER_MODEL="base"
     export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
@@ -44,10 +47,11 @@ function setup_env() {
     export LVM_SERVICE_HOST_IP=${host_ip}
     export MEGA_SERVICE_HOST_IP=${host_ip}
     export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
     export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
     export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-    export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-    export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
 }
 
 function start_services() {
@@ -61,12 +65,14 @@ function start_services() {
 
 function prepare_data() {
     cd $LOG_PATH
-    echo "Downloading video"
+    echo "Downloading image and video"
+    wget https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true -O ${image_fn}
     wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/WeAreGoingOnBullrun.mp4 -O ${video_fn}
-
+    echo "Writing caption file"
+    echo "This is an apple."  > ${caption_fn}
     sleep 1m
-
 }
+
 function validate_service() {
     local URL="$1"
     local EXPECTED_RESULT="$2"
@@ -74,9 +80,15 @@ function validate_service() {
     local DOCKER_NAME="$4"
     local INPUT_DATA="$5"
 
-    if [[ $SERVICE_NAME == *"dataprep-multimodal-redis"* ]]; then
+    if [[ $SERVICE_NAME == *"dataprep-multimodal-redis-transcript"* ]]; then
         cd $LOG_PATH
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${video_fn}" -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep-multimodal-redis-caption"* ]]; then
+        cd $LOG_PATH
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${image_fn}" -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep-multimodal-redis-ingest"* ]]; then
+        cd $LOG_PATH
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${image_fn}" -F "files=@./apple.txt" -H 'Content-Type: multipart/form-data' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
@@ -145,27 +157,34 @@ function validate_microservices() {
     sleep 1m # retrieval can't curl as expected, try to wait for more time
 
     # test data prep
-    echo "Data Prep with Generating Transcript"
+    echo "Data Prep with Generating Transcript for Video"
     validate_service \
         "${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}" \
         "Data preparation succeeded" \
-        "dataprep-multimodal-redis" \
+        "dataprep-multimodal-redis-transcript" \
         "dataprep-multimodal-redis"
 
-    # echo "Data Prep with Generating Caption"
-    # validate_service \
-    #     "${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}" \
-    #     "Data preparation succeeded" \
-    #     "dataprep-multimodal-redis" \
-    #     "dataprep-multimodal-redis"
+    echo "Data Prep with Image & Caption Ingestion"
+    validate_service \
+        "${DATAPREP_INGEST_SERVICE_ENDPOINT}" \
+        "Data preparation succeeded" \
+        "dataprep-multimodal-redis-ingest" \
+        "dataprep-multimodal-redis"
 
-    echo "Validating get file"
+    echo "Validating get file returns mp4"
     validate_service \
-        "${DATAPREP_GET_VIDEO_ENDPOINT}" \
+        "${DATAPREP_GET_FILE_ENDPOINT}" \
         '.mp4' \
         "dataprep_get" \
         "dataprep-multimodal-redis"
 
+    echo "Validating get file returns png"
+    validate_service \
+        "${DATAPREP_GET_FILE_ENDPOINT}" \
+        '.png' \
+        "dataprep_get" \
+        "dataprep-multimodal-redis"
+
     sleep 1m
 
     # multimodal retrieval microservice
@@ -178,7 +197,7 @@ function validate_microservices() {
         "retriever-multimodal-redis" \
         "{\"text\":\"test\",\"embedding\":${your_embedding}}"
 
-    sleep 10s
+    sleep 3m
 
     # llava server
     echo "Evaluating lvm-llava"
@@ -198,6 +217,14 @@ function validate_microservices() {
         "lvm-llava-svc" \
         '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image", "video_id": "8c7461df-b373-4a00-8696-9a2234359fe0", "time_of_frame_ms":"37000000", "source_video":"WeAreGoingOnBullrun_8c7461df-b373-4a00-8696-9a2234359fe0.mp4"}], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}'
 
+    # data prep requiring lvm
+    echo "Data Prep with Generating Caption for Image"
+    validate_service \
+        "${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}" \
+        "Data preparation succeeded" \
+        "dataprep-multimodal-redis-caption" \
+        "dataprep-multimodal-redis"
+
     sleep 3m
 }
 
@@ -222,14 +249,22 @@ function validate_megaservice() {
 }
 
 function validate_delete {
-    echo "Validate data prep delete videos"
+    echo "Validate data prep delete files"
     validate_service \
-        "${DATAPREP_DELETE_VIDEO_ENDPOINT}" \
+        "${DATAPREP_DELETE_FILE_ENDPOINT}" \
         '{"status":true}' \
         "dataprep_del" \
         "dataprep-multimodal-redis"
 }
 
+function delete_data() {
+    cd $LOG_PATH
+    echo "Deleting image, video, and caption"
+    rm -rf ${image_fn}
+    rm -rf ${video_fn}
+    rm -rf ${caption_fn}
+}
+
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/cpu/xeon
     docker compose -f compose.yaml stop && docker compose -f compose.yaml rm -f
@@ -254,6 +289,7 @@ function main() {
     validate_delete
     echo "==== delete validated ===="
 
+    delete_data
     stop_docker
     echo y | docker system prune
 
diff --git a/MultimodalQnA/ui/gradio/conversation.py b/MultimodalQnA/ui/gradio/conversation.py
index 9f1a2827b5..3057e9879d 100644
--- a/MultimodalQnA/ui/gradio/conversation.py
+++ b/MultimodalQnA/ui/gradio/conversation.py
@@ -30,6 +30,7 @@ class Conversation:
     base64_frame: str = None
     skip_next: bool = False
     split_video: str = None
+    image: str = None
 
     def _template_caption(self):
         out = ""
@@ -59,6 +60,8 @@ def get_prompt(self):
                                 else:
                                     base64_frame = get_b64_frame_from_timestamp(self.video_file, self.time_of_frame_ms)
                                     self.base64_frame = base64_frame
+                                if base64_frame is None:
+                                    base64_frame = ""
                                 content.append({"type": "image_url", "image_url": {"url": base64_frame}})
                             else:
                                 content = message
@@ -137,6 +140,7 @@ def dict(self):
             "caption": self.caption,
             "base64_frame": self.base64_frame,
             "split_video": self.split_video,
+            "image": self.image,
         }
 
 
@@ -152,4 +156,5 @@ def dict(self):
     time_of_frame_ms=None,
     base64_frame=None,
     split_video=None,
+    image=None,
 )
diff --git a/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py b/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py
index 3eba01a71f..ec6a033ca8 100644
--- a/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py
+++ b/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py
@@ -13,7 +13,7 @@
 from conversation import multimodalqna_conv
 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
-from utils import build_logger, moderation_msg, server_error_msg, split_video
+from utils import build_logger, make_temp_image, moderation_msg, server_error_msg, split_video
 
 logger = build_logger("gradio_web_server", "gradio_web_server.log")
 
@@ -47,22 +47,24 @@ def clear_history(state, request: gr.Request):
     logger.info(f"clear_history. ip: {request.client.host}")
     if state.split_video and os.path.exists(state.split_video):
         os.remove(state.split_video)
+    if state.image and os.path.exists(state.image):
+        os.remove(state.image)
     state = multimodalqna_conv.copy()
-    return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 1
+    return (state, state.to_gradio_chatbot(), None, None, None) + (disable_btn,) * 1
 
 
 def add_text(state, text, request: gr.Request):
     logger.info(f"add_text. ip: {request.client.host}. len: {len(text)}")
     if len(text) <= 0:
         state.skip_next = True
-        return (state, state.to_gradio_chatbot(), "", None) + (no_change_btn,) * 1
+        return (state, state.to_gradio_chatbot(), None) + (no_change_btn,) * 1
 
     text = text[:2000]  # Hard cut-off
 
     state.append_message(state.roles[0], text)
     state.append_message(state.roles[1], None)
     state.skip_next = False
-    return (state, state.to_gradio_chatbot(), "") + (disable_btn,) * 1
+    return (state, state.to_gradio_chatbot(), None) + (disable_btn,) * 1
 
 
 def http_bot(state, request: gr.Request):
@@ -73,7 +75,7 @@ def http_bot(state, request: gr.Request):
     if state.skip_next:
         # This generate call is skipped due to invalid inputs
         path_to_sub_videos = state.get_path_to_subvideos()
-        yield (state, state.to_gradio_chatbot(), path_to_sub_videos) + (no_change_btn,) * 1
+        yield (state, state.to_gradio_chatbot(), path_to_sub_videos, None) + (no_change_btn,) * 1
         return
 
     if len(state.messages) == state.offset + 2:
@@ -97,7 +99,7 @@ def http_bot(state, request: gr.Request):
     logger.info(f"==== url request ====\n{gateway_addr}")
 
     state.messages[-1][-1] = "▌"
-    yield (state, state.to_gradio_chatbot(), state.split_video) + (disable_btn,) * 1
+    yield (state, state.to_gradio_chatbot(), state.split_video, state.image) + (disable_btn,) * 1
 
     try:
         response = requests.post(
@@ -108,6 +110,7 @@ def http_bot(state, request: gr.Request):
         )
         print(response.status_code)
         print(response.json())
+
         if response.status_code == 200:
             response = response.json()
             choice = response["choices"][-1]
@@ -123,44 +126,61 @@ def http_bot(state, request: gr.Request):
                 video_file = metadata["source_video"]
                 state.video_file = os.path.join(static_dir, metadata["source_video"])
                 state.time_of_frame_ms = metadata["time_of_frame_ms"]
-                try:
-                    splited_video_path = split_video(
-                        state.video_file, state.time_of_frame_ms, tmp_dir, f"{state.time_of_frame_ms}__{video_file}"
-                    )
-                except:
-                    print(f"video {state.video_file} does not exist in UI host!")
-                    splited_video_path = None
-                state.split_video = splited_video_path
+                file_ext = os.path.splitext(state.video_file)[-1]
+                if file_ext == ".mp4":
+                    try:
+                        splited_video_path = split_video(
+                            state.video_file, state.time_of_frame_ms, tmp_dir, f"{state.time_of_frame_ms}__{video_file}"
+                        )
+                    except:
+                        print(f"video {state.video_file} does not exist in UI host!")
+                        splited_video_path = None
+                    state.split_video = splited_video_path
+                elif file_ext in [".jpg", ".jpeg", ".png", ".gif"]:
+                    try:
+                        output_image_path = make_temp_image(state.video_file, file_ext)
+                    except:
+                        print(f"image {state.video_file} does not exist in UI host!")
+                        output_image_path = None
+                    state.image = output_image_path
+
         else:
             raise requests.exceptions.RequestException
     except requests.exceptions.RequestException as e:
         state.messages[-1][-1] = server_error_msg
-        yield (state, state.to_gradio_chatbot(), None) + (enable_btn,)
+        yield (state, state.to_gradio_chatbot(), None, None) + (enable_btn,)
         return
 
     state.messages[-1][-1] = message
-    yield (state, state.to_gradio_chatbot(), state.split_video) + (enable_btn,) * 1
+    yield (
+        state,
+        state.to_gradio_chatbot(),
+        gr.Video(state.split_video, visible=state.split_video is not None),
+        gr.Image(state.image, visible=state.image is not None),
+    ) + (enable_btn,) * 1
 
     logger.info(f"{state.messages[-1][-1]}")
     return
 
 
-def ingest_video_gen_transcript(filepath, request: gr.Request):
-    yield (gr.Textbox(visible=True, value="Please wait for ingesting your uploaded video into database..."))
+def ingest_gen_transcript(filepath, filetype, request: gr.Request):
+    yield (
+        gr.Textbox(visible=True, value=f"Please wait while your uploaded {filetype} is ingested into the database...")
+    )
     verified_filepath = os.path.normpath(filepath)
     if not verified_filepath.startswith(tmp_upload_folder):
-        print("Found malicious video file name!")
+        print(f"Found malicious {filetype} file name!")
         yield (
             gr.Textbox(
                 visible=True,
-                value="Your uploaded video's file name has special characters that are not allowed. Please consider update the video file name!",
+                value=f"Your uploaded {filetype}'s file name has special characters that are not allowed (depends on the OS, some examples are \, /, :, and *). Please consider changing the file name.",
             )
         )
         return
     basename = os.path.basename(verified_filepath)
     dest = os.path.join(static_dir, basename)
     shutil.copy(verified_filepath, dest)
-    print("Done copy uploaded file to static folder!")
+    print("Done copying uploaded file to static folder.")
     headers = {
         # 'Content-Type': 'multipart/form-data'
     }
@@ -172,17 +192,17 @@ def ingest_video_gen_transcript(filepath, request: gr.Request):
     if response.status_code == 200:
         response = response.json()
         print(response)
-        yield (gr.Textbox(visible=True, value="Video ingestion is done. Saving your uploaded video..."))
+        yield (gr.Textbox(visible=True, value=f"The {filetype} ingestion is done. Saving your uploaded {filetype}..."))
         time.sleep(2)
         fn_no_ext = Path(dest).stem
-        if "video_id_maps" in response and fn_no_ext in response["video_id_maps"]:
-            new_dst = os.path.join(static_dir, response["video_id_maps"][fn_no_ext])
-            print(response["video_id_maps"][fn_no_ext])
+        if "file_id_maps" in response and fn_no_ext in response["file_id_maps"]:
+            new_dst = os.path.join(static_dir, response["file_id_maps"][fn_no_ext])
+            print(response["file_id_maps"][fn_no_ext])
             os.rename(dest, new_dst)
             yield (
                 gr.Textbox(
                     visible=True,
-                    value="Congratulation! Your upload is done!\nClick the X button on the top right of the video upload box to upload another video.",
+                    value=f"Congratulations, your upload is done!\nClick the X button on the top right of the {filetype} upload box to upload another {filetype}.",
                 )
             )
             return
@@ -190,51 +210,53 @@ def ingest_video_gen_transcript(filepath, request: gr.Request):
         yield (
             gr.Textbox(
                 visible=True,
-                value="Something wrong!\nPlease click the X button on the top right of the video upload boxreupload your video!",
+                value=f"Something went wrong (server error: {response.status_code})!\nPlease click the X button on the top right of the {filetype} upload box to reupload your video.",
             )
         )
         time.sleep(2)
     return
 
 
-def ingest_video_gen_caption(filepath, request: gr.Request):
-    yield (gr.Textbox(visible=True, value="Please wait for ingesting your uploaded video into database..."))
+def ingest_gen_caption(filepath, filetype, request: gr.Request):
+    yield (
+        gr.Textbox(visible=True, value=f"Please wait while your uploaded {filetype} is ingested into the database...")
+    )
     verified_filepath = os.path.normpath(filepath)
     if not verified_filepath.startswith(tmp_upload_folder):
-        print("Found malicious video file name!")
+        print(f"Found malicious {filetype} file name!")
         yield (
             gr.Textbox(
                 visible=True,
-                value="Your uploaded video's file name has special characters that are not allowed. Please consider update the video file name!",
+                value=f"Your uploaded {filetype}'s file name has special characters that are not allowed (depends on the OS, some examples are \, /, :, and *). Please consider changing the file name.",
             )
         )
         return
     basename = os.path.basename(verified_filepath)
     dest = os.path.join(static_dir, basename)
     shutil.copy(verified_filepath, dest)
-    print("Done copy uploaded file to static folder!")
+    print("Done copying uploaded file to static folder.")
     headers = {
         # 'Content-Type': 'multipart/form-data'
     }
     files = {
         "files": open(dest, "rb"),
     }
-    response = requests.post(dataprep_gen_captiono_addr, headers=headers, files=files)
+    response = requests.post(dataprep_gen_caption_addr, headers=headers, files=files)
     print(response.status_code)
     if response.status_code == 200:
         response = response.json()
         print(response)
-        yield (gr.Textbox(visible=True, value="Video ingestion is done. Saving your uploaded video..."))
+        yield (gr.Textbox(visible=True, value=f"The {filetype} ingestion is done. Saving your uploaded {filetype}..."))
         time.sleep(2)
         fn_no_ext = Path(dest).stem
-        if "video_id_maps" in response and fn_no_ext in response["video_id_maps"]:
-            new_dst = os.path.join(static_dir, response["video_id_maps"][fn_no_ext])
-            print(response["video_id_maps"][fn_no_ext])
+        if "file_id_maps" in response and fn_no_ext in response["file_id_maps"]:
+            new_dst = os.path.join(static_dir, response["file_id_maps"][fn_no_ext])
+            print(response["file_id_maps"][fn_no_ext])
             os.rename(dest, new_dst)
             yield (
                 gr.Textbox(
                     visible=True,
-                    value="Congratulation! Your upload is done!\nClick the X button on the top right of the video upload box to upload another video.",
+                    value=f"Congratulations, your upload is done!\nClick the X button on the top right of the {filetype} upload box to upload another {filetype}.",
                 )
             )
             return
@@ -242,48 +264,181 @@ def ingest_video_gen_caption(filepath, request: gr.Request):
         yield (
             gr.Textbox(
                 visible=True,
-                value="Something wrong!\nPlease click the X button on the top right of the video upload boxreupload your video!",
+                value=f"Something went wrong (server error: {response.status_code})!\nPlease click the X button on the top right of the {filetype} upload box to reupload your video.",
             )
         )
         time.sleep(2)
     return
 
 
-def clear_uploaded_video(request: gr.Request):
+def ingest_with_text(filepath, text, request: gr.Request):
+    yield (gr.Textbox(visible=True, value="Please wait for your uploaded image to be ingested into the database..."))
+    verified_filepath = os.path.normpath(filepath)
+    if not verified_filepath.startswith(tmp_upload_folder):
+        print("Found malicious image file name!")
+        yield (
+            gr.Textbox(
+                visible=True,
+                value="Your uploaded image's file name has special characters that are not allowed (depends on the OS, some examples are \, /, :, and *). Please consider changing the file name.",
+            )
+        )
+        return
+    basename = os.path.basename(verified_filepath)
+    dest = os.path.join(static_dir, basename)
+    shutil.copy(verified_filepath, dest)
+    text_basename = "{}.txt".format(os.path.splitext(basename)[0])
+    text_dest = os.path.join(static_dir, text_basename)
+    with open(text_dest, "w") as file:
+        file.write(text)
+    print("Done copying uploaded files to static folder!")
+    headers = {
+        # 'Content-Type': 'multipart/form-data'
+    }
+    files = [("files", (basename, open(dest, "rb"))), ("files", (text_basename, open(text_dest, "rb")))]
+    try:
+        response = requests.post(dataprep_ingest_addr, headers=headers, files=files)
+    finally:
+        os.remove(text_dest)
+    print(response.status_code)
+    if response.status_code == 200:
+        response = response.json()
+        print(response)
+        yield (gr.Textbox(visible=True, value="Image ingestion is done. Saving your uploaded image..."))
+        time.sleep(2)
+        fn_no_ext = Path(dest).stem
+        if "file_id_maps" in response and fn_no_ext in response["file_id_maps"]:
+            new_dst = os.path.join(static_dir, response["file_id_maps"][fn_no_ext])
+            print(response["file_id_maps"][fn_no_ext])
+            os.rename(dest, new_dst)
+            yield (
+                gr.Textbox(
+                    visible=True,
+                    value="Congratulation! Your upload is done!\nClick the X button on the top right of the image upload box to upload another image.",
+                )
+            )
+            return
+    else:
+        yield (
+            gr.Textbox(
+                visible=True,
+                value=f"Something went wrong (server error: {response.status_code})!\nPlease click the X button on the top right of the image upload box to reupload your image!",
+            )
+        )
+        time.sleep(2)
+    return
+
+
+def hide_text(request: gr.Request):
     return gr.Textbox(visible=False)
 
 
-with gr.Blocks() as upload_gen_trans:
-    gr.Markdown("# Ingest Your Own Video - Utilizing Generated Transcripts")
-    gr.Markdown(
-        "Please use this interface to ingest your own video if the video has meaningful audio (e.g., announcements, discussions, etc...)"
-    )
+def clear_text(request: gr.Request):
+    return None
+
+
+with gr.Blocks() as upload_video:
+    gr.Markdown("# Ingest Your Own Video Using Generated Transcripts or Captions")
+    gr.Markdown("Use this interface to ingest your own video and generate transcripts or captions for it")
+
+    def select_upload_type(choice, request: gr.Request):
+        if choice == "transcript":
+            return gr.Video(sources="upload", visible=True), gr.Video(sources="upload", visible=False)
+        else:
+            return gr.Video(sources="upload", visible=False), gr.Video(sources="upload", visible=True)
+
     with gr.Row():
         with gr.Column(scale=6):
-            video_upload = gr.Video(sources="upload", height=512, width=512, elem_id="video_upload")
+            video_upload_trans = gr.Video(sources="upload", elem_id="video_upload_trans", visible=True)
+            video_upload_cap = gr.Video(sources="upload", elem_id="video_upload_cap", visible=False)
         with gr.Column(scale=3):
+            text_options_radio = gr.Radio(
+                [
+                    ("Generate transcript (video contains voice)", "transcript"),
+                    ("Generate captions (video does not contain voice)", "caption"),
+                ],
+                label="Text Options",
+                info="How should text be ingested?",
+                value="transcript",
+            )
             text_upload_result = gr.Textbox(visible=False, interactive=False, label="Upload Status")
-        video_upload.upload(ingest_video_gen_transcript, [video_upload], [text_upload_result])
-        video_upload.clear(clear_uploaded_video, [], [text_upload_result])
+        video_upload_trans.upload(
+            ingest_gen_transcript, [video_upload_trans, gr.Textbox(value="video", visible=False)], [text_upload_result]
+        )
+        video_upload_trans.clear(hide_text, [], [text_upload_result])
+        video_upload_cap.upload(
+            ingest_gen_caption, [video_upload_cap, gr.Textbox(value="video", visible=False)], [text_upload_result]
+        )
+        video_upload_cap.clear(hide_text, [], [text_upload_result])
+        text_options_radio.change(select_upload_type, [text_options_radio], [video_upload_trans, video_upload_cap])
 
-with gr.Blocks() as upload_gen_captions:
-    gr.Markdown("# Ingest Your Own Video - Utilizing Generated Captions")
-    gr.Markdown(
-        "Please use this interface to ingest your own video if the video has meaningless audio (e.g., background musics, etc...)"
-    )
+with gr.Blocks() as upload_image:
+    gr.Markdown("# Ingest Your Own Image Using Generated or Custom Captions/Labels")
+    gr.Markdown("Use this interface to ingest your own image and generate a caption for it")
+
+    def select_upload_type(choice, request: gr.Request):
+        if choice == "gen_caption":
+            return gr.Image(sources="upload", visible=True), gr.Image(sources="upload", visible=False)
+        else:
+            return gr.Image(sources="upload", visible=False), gr.Image(sources="upload", visible=True)
+
+    with gr.Row():
+        with gr.Column(scale=6):
+            image_upload_cap = gr.Image(type="filepath", sources="upload", elem_id="image_upload_cap", visible=True)
+            image_upload_text = gr.Image(type="filepath", sources="upload", elem_id="image_upload_cap", visible=False)
+        with gr.Column(scale=3):
+            text_options_radio = gr.Radio(
+                [("Generate caption", "gen_caption"), ("Custom caption or label", "custom_caption")],
+                label="Text Options",
+                info="How should text be ingested?",
+                value="gen_caption",
+            )
+            custom_caption = gr.Textbox(visible=True, interactive=True, label="Custom Caption or Label")
+            text_upload_result = gr.Textbox(visible=False, interactive=False, label="Upload Status")
+        image_upload_cap.upload(
+            ingest_gen_caption, [image_upload_cap, gr.Textbox(value="image", visible=False)], [text_upload_result]
+        )
+        image_upload_cap.clear(hide_text, [], [text_upload_result])
+        image_upload_text.upload(ingest_with_text, [image_upload_text, custom_caption], [text_upload_result]).then(
+            clear_text, [], [custom_caption]
+        )
+        image_upload_text.clear(hide_text, [], [text_upload_result])
+        text_options_radio.change(select_upload_type, [text_options_radio], [image_upload_cap, image_upload_text])
+
+with gr.Blocks() as upload_audio:
+    gr.Markdown("# Ingest Your Own Audio Using Generated Transcripts")
+    gr.Markdown("Use this interface to ingest your own audio file and generate a transcript for it")
     with gr.Row():
         with gr.Column(scale=6):
-            video_upload_cap = gr.Video(sources="upload", height=512, width=512, elem_id="video_upload_cap")
+            audio_upload = gr.Audio(type="filepath")
+        with gr.Column(scale=3):
+            text_upload_result = gr.Textbox(visible=False, interactive=False, label="Upload Status")
+        audio_upload.upload(
+            ingest_gen_transcript, [audio_upload, gr.Textbox(value="audio", visible=False)], [text_upload_result]
+        )
+        audio_upload.stop_recording(
+            ingest_gen_transcript, [audio_upload, gr.Textbox(value="audio", visible=False)], [text_upload_result]
+        )
+        audio_upload.clear(hide_text, [], [text_upload_result])
+
+with gr.Blocks() as upload_pdf:
+    gr.Markdown("# Ingest Your Own PDF")
+    gr.Markdown("Use this interface to ingest your own PDF file with text, tables, images, and graphs")
+    with gr.Row():
+        with gr.Column(scale=6):
+            image_upload_cap = gr.File()
         with gr.Column(scale=3):
             text_upload_result_cap = gr.Textbox(visible=False, interactive=False, label="Upload Status")
-        video_upload_cap.upload(ingest_video_gen_transcript, [video_upload_cap], [text_upload_result_cap])
-        video_upload_cap.clear(clear_uploaded_video, [], [text_upload_result_cap])
+        image_upload_cap.upload(
+            ingest_gen_caption, [image_upload_cap, gr.Textbox(value="PDF", visible=False)], [text_upload_result_cap]
+        )
+        image_upload_cap.clear(hide_text, [], [text_upload_result_cap])
 
 with gr.Blocks() as qna:
     state = gr.State(multimodalqna_conv.copy())
     with gr.Row():
         with gr.Column(scale=4):
-            video = gr.Video(height=512, width=512, elem_id="video")
+            video = gr.Video(height=512, width=512, elem_id="video", visible=True, label="Media")
+            image = gr.Image(height=512, width=512, elem_id="image", visible=False, label="Media")
         with gr.Column(scale=7):
             chatbot = gr.Chatbot(elem_id="chatbot", label="MultimodalQnA Chatbot", height=390)
             with gr.Row():
@@ -293,7 +448,8 @@ def clear_uploaded_video(request: gr.Request):
                         # show_label=False,
                         # container=False,
                         label="Query",
-                        info="Enter your query here!",
+                        info="Enter a text query below",
+                        # submit_btn=False,
                     )
                 with gr.Column(scale=1, min_width=100):
                     with gr.Row():
@@ -306,7 +462,7 @@ def clear_uploaded_video(request: gr.Request):
         [
             state,
         ],
-        [state, chatbot, textbox, video, clear_btn],
+        [state, chatbot, textbox, video, image, clear_btn],
     )
 
     submit_btn.click(
@@ -318,17 +474,19 @@ def clear_uploaded_video(request: gr.Request):
         [
             state,
         ],
-        [state, chatbot, video, clear_btn],
+        [state, chatbot, video, image, clear_btn],
     )
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# MultimodalQnA")
     with gr.Tabs():
-        with gr.TabItem("MultimodalQnA With Your Videos"):
+        with gr.TabItem("MultimodalQnA"):
             qna.render()
-        with gr.TabItem("Upload Your Own Videos"):
-            upload_gen_trans.render()
-        with gr.TabItem("Upload Your Own Videos"):
-            upload_gen_captions.render()
+        with gr.TabItem("Upload Video"):
+            upload_video.render()
+        with gr.TabItem("Upload Image"):
+            upload_image.render()
+        with gr.TabItem("Upload Audio"):
+            upload_audio.render()
 
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")
@@ -343,6 +501,9 @@ def clear_uploaded_video(request: gr.Request):
     parser.add_argument("--share", action="store_true")
 
     backend_service_endpoint = os.getenv("BACKEND_SERVICE_ENDPOINT", "http://localhost:8888/v1/multimodalqna")
+    dataprep_ingest_endpoint = os.getenv(
+        "DATAPREP_INGEST_SERVICE_ENDPOINT", "http://localhost:6007/v1/ingest_with_text"
+    )
     dataprep_gen_transcript_endpoint = os.getenv(
         "DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT", "http://localhost:6007/v1/generate_transcripts"
     )
@@ -353,9 +514,11 @@ def clear_uploaded_video(request: gr.Request):
     logger.info(f"args: {args}")
     global gateway_addr
     gateway_addr = backend_service_endpoint
+    global dataprep_ingest_addr
+    dataprep_ingest_addr = dataprep_ingest_endpoint
     global dataprep_gen_transcript_addr
     dataprep_gen_transcript_addr = dataprep_gen_transcript_endpoint
-    global dataprep_gen_captiono_addr
-    dataprep_gen_captiono_addr = dataprep_gen_caption_endpoint
+    global dataprep_gen_caption_addr
+    dataprep_gen_caption_addr = dataprep_gen_caption_endpoint
 
     uvicorn.run(app, host=args.host, port=args.port)
diff --git a/MultimodalQnA/ui/gradio/requirements.txt b/MultimodalQnA/ui/gradio/requirements.txt
index 36179f83be..bb784f9112 100644
--- a/MultimodalQnA/ui/gradio/requirements.txt
+++ b/MultimodalQnA/ui/gradio/requirements.txt
@@ -1,4 +1,4 @@
-gradio==5.0.0
+gradio==5.5.0
 moviepy==1.0.3
 numpy==1.26.4
 opencv-python==4.10.0.82
diff --git a/MultimodalQnA/ui/gradio/utils.py b/MultimodalQnA/ui/gradio/utils.py
index f6e1027eba..7a730a7ed4 100644
--- a/MultimodalQnA/ui/gradio/utils.py
+++ b/MultimodalQnA/ui/gradio/utils.py
@@ -5,6 +5,7 @@
 import logging
 import logging.handlers
 import os
+import shutil
 import sys
 from pathlib import Path
 
@@ -118,6 +119,18 @@ def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER
     return cv2.resize(image, dim, interpolation=inter)
 
 
+def make_temp_image(
+    image_name,
+    file_ext,
+    output_image_path: str = "./public/images",
+    output_image_name: str = "image_tmp",
+):
+    Path(output_image_path).mkdir(parents=True, exist_ok=True)
+    output_image = os.path.join(output_image_path, "{}.{}".format(output_image_name, file_ext))
+    shutil.copy(image_name, output_image)
+    return output_image
+
+
 # function to split video at a timestamp
 def split_video(
     video_path,
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
index c5463ad103..bb9239abfd 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
@@ -175,6 +175,9 @@ export LLM_SERVICE_HOST_PORT_FAQGEN=9002
 export LLM_SERVICE_HOST_PORT_CODEGEN=9001
 export LLM_SERVICE_HOST_PORT_DOCSUM=9003
 export PROMPT_COLLECTION_NAME="prompt"
+export RERANK_SERVER_PORT=8808
+export EMBEDDING_SERVER_PORT=6006
+export LLM_SERVER_PORT=9009
 ```
 
 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -290,7 +293,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
 10. DocSum LLM Microservice
 
     ```bash
-    curl http://${host_ip}:9002/v1/chat/docsum\
+    curl http://${host_ip}:9003/v1/chat/docsum\
       -X POST \
       -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
       -H 'Content-Type: application/json'
@@ -299,7 +302,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
 11. FAQGen LLM Microservice
 
     ```bash
-    curl http://${host_ip}:9003/v1/faqgen\
+    curl http://${host_ip}:9002/v1/faqgen\
       -X POST \
       -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
       -H 'Content-Type: application/json'
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
index 2e56d65844..4bda722234 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -26,7 +26,10 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
+      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-server
@@ -70,6 +73,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -111,7 +115,7 @@ services:
       LANGCHAIN_PROJECT: "opea-reranking-service"
     restart: unless-stopped
   tgi_service:
-    image: ghcr.io/huggingface/text-generation-inference:2.1.0
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
@@ -125,7 +129,7 @@ services:
       HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
   llm:
     image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
     container_name: llm-tgi-server
@@ -152,11 +156,12 @@ services:
     depends_on:
       - redis-vector-db
       - tei-embedding-service
-      - embedding
+      - dataprep-redis-service
       - retriever
       - tei-reranking-service
-      - reranking
       - tgi_service
+      - embedding
+      - reranking
       - llm
     ports:
       - "8888:8888"
@@ -165,14 +170,19 @@ services:
       https_proxy: ${https_proxy}
       http_proxy: ${http_proxy}
       MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
-      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80}
       RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
-      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
-      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+      RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80}
+      LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+      LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80}
+      LLM_MODEL: ${LLM_MODEL_ID}
+      LOGFLAG: ${LOGFLAG}
     ipc: host
     restart: always
   tgi_service_codegen:
-    image: ghcr.io/huggingface/text-generation-inference:2.1.0
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi_service_codegen
     ports:
       - "8028:80"
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md
new file mode 100644
index 0000000000..cc3a13cec4
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md
@@ -0,0 +1,684 @@
+# Build Mega Service of Productivity Suite on Gaudi
+
+This document outlines the deployment process for OPEA Productivity Suite utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server and [GenAIExamples](https://github.com/opea-project/GenAIExamples.git) solutions. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service.
+
+---
+
+## 🐳 Build Docker Images
+
+First of all, you need to build Docker Images locally and install the python package of it.
+
+### 1. Build Embedding Image
+
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
+cd GenAIComps
+docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
+```
+
+### 2. Build Retriever Image
+
+```bash
+docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
+```
+
+### 3. Build Rerank Image
+
+```bash
+docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
+```
+
+### 4. Build LLM Images
+
+#### Use vLLM as backend to build Text-Generation
+
+```bash
+docker build --no-cache -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/Dockerfile .
+```
+
+#### Use vLLM as backend to build FAQ Generation
+
+```bash
+docker build -t opea/llm-faqgen-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/faq-generation/vllm/langchain/Dockerfile .
+```
+
+#### Use vLLM as backend to build Doc Summarization
+
+```bash
+docker build -t opea/llm-docsum-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/vllm/langchain/Dockerfile .
+```
+
+### 5. Build Dataprep Image
+
+```bash
+docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
+```
+
+### 6. Build Prompt Registry Image
+
+```bash
+docker build -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/Dockerfile .
+```
+
+### 7. Build Chat History Image
+
+```bash
+docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/mongo/Dockerfile .
+cd ..
+```
+
+### 8. Build MegaService Docker Images
+
+The Productivity Suite is composed of multiple GenAIExample reference solutions composed together.
+
+#### 8.1 Build ChatQnA MegaService Docker Images
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/ChatQnA/
+docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+#### 8.2 Build DocSum Megaservice Docker Images
+
+```bash
+cd GenAIExamples/DocSum
+docker build --no-cache -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+#### 8.3 Build CodeGen Megaservice Docker Images
+
+```bash
+cd GenAIExamples/CodeGen
+docker build --no-cache -t opea/codegen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+#### 8.4 Build FAQGen Megaservice Docker Images
+
+```bash
+cd GenAIExamples/FaqGen
+docker build --no-cache -t opea/faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+### 9. Build UI Docker Image
+
+Build frontend Docker image that enables via below command:
+
+**Export the value of the public IP address of your server to the `host_ip` environment variable**
+
+```bash
+cd GenAIExamples/ProductivitySuite/ui
+docker build --no-cache -t opea/productivity-suite-react-ui-server:latest -f docker/Dockerfile.react .
+```
+
+---
+
+## 🚀 Start Microservices
+
+### Setup Environment Variables
+
+Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
+
+**Export the value of the public IP address of your server to the `host_ip` environment variable**
+
+> Change the External_Public_IP below with the actual IPV4 value
+
+```
+export host_ip="External_Public_IP"
+```
+
+**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable**
+
+> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value
+
+```
+export your_hf_api_token="Your_Huggingface_API_Token"
+```
+
+**Append the value of the public IP address to the no_proxy list**
+
+```
+export your_no_proxy=${your_no_proxy},"External_Public_IP"
+```
+
+```bash
+export MONGO_HOST=${host_ip}
+export MONGO_PORT=27017
+export DB_NAME="test"
+export COLLECTION_NAME="Conversations"
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
+export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export vLLM_ENDPOINT="http://${host_ip}:9009"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip}
+export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip}
+export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip}
+export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip}
+export vLLM_ENDPOINT_CHATQNA="http://${host_ip}:9009"
+export vLLM_ENDPOINT_CODEGEN="http://${host_ip}:8028"
+export vLLM_ENDPOINT_FAQGEN="http://${host_ip}:9009"
+export vLLM_ENDPOINT_DOCSUM="http://${host_ip}:9009"
+export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
+export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen"
+export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen"
+export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
+export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
+export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete"
+export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get"
+export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get"
+export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create"
+export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080"
+export LLM_SERVICE_HOST_PORT_FAQGEN=9002
+export LLM_SERVICE_HOST_PORT_CODEGEN=9001
+export LLM_SERVICE_HOST_PORT_DOCSUM=9003
+export PROMPT_COLLECTION_NAME="prompt"
+```
+
+Note: Please replace with `host_ip` with you external IP address, do not use localhost.
+
+**To use remote vLLM and TEI service endpoints instead of local set below environment variables**
+
+> Change the EMBEDDING_ENDPOINT, RERANKING_ENDPOINT and vLLM_ENDPOINT with values of corresponding remote endpoints
+
+```
+export embedding_endpoint="EMBEDDING_ENDPOINT"
+export reranking_endpoint="RERANKING_ENDPOINT"
+export vllm_endpoint="vLLM_ENDPOINT"
+```
+
+> Run the below export commands to set all the required environment variables for remote vLLM and TEI
+
+```bash
+export TEI_EMBEDDING_ENDPOINT=${embedding_endpoint}
+export TEI_RERANKING_ENDPOINT=${reranking_endpoint}
+export vLLM_ENDPOINT=${vllm_endpoint}
+export vLLM_ENDPOINT_CHATQNA=${vllm_endpoint}
+export vLLM_ENDPOINT_CODEGEN=${vllm_endpoint}
+export vLLM_ENDPOINT_FAQGEN=${vllm_endpoint}
+export vLLM_ENDPOINT_DOCSUM=${vllm_endpoint}
+```
+
+**To use authentication for remote vLLM and TEI service endpoints (OAuth Client Credentials Flow)**
+
+> Change the token_url, clientid and client_secret with exact values from identify provider that you are using
+
+```bash
+export CLIENTID=${clientid}
+export CLIENT_SECRET=${client_secret}
+export TOKEN_URL=${token_url}
+```
+
+**To use multiple models**
+
+> Create the model_configs.json file under Gaudi's docker_conpose folder
+
+```bash
+cd ..
+cd docker_compose/intel/hpu/gaudi
+touch model_configs.json
+```
+
+> Add the model details as shown in the below template.
+
+File Structure:
+
+```json
+[
+  {
+    "model_name": "Your Model Name",
+    "displayName": "Model Display Name for the UI",
+    "endpoint": "Model Endpoint with http/https",
+    "minToken": 100, //Min Token Value
+    "maxToken": 2000 //Max Token Value
+  },
+  {
+    "model_name": "Your Model Name",
+    "displayName": "Model Display Name for the UI",
+    "endpoint": "Model Endpoint with http/https",
+    "minToken": 100, //Min Token Value
+    "maxToken": 2000 //Max Token Value
+  }
+]
+```
+
+Example:
+
+```json
+[
+  {
+    "model_name": "meta-llama/Meta-Llama-3.1-70B-Instruct",
+    "displayName": "llama-3.1-70B",
+    "endpoint": "<remote_vllm_model_endpoint>",
+    "minToken": 100,
+    "maxToken": 2000
+  },
+  {
+    "model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "displayName": "llama-3.1-8B",
+    "endpoint": "<remote_vllm_model_endpoint>",
+    "minToken": 100,
+    "maxToken": 2000
+  }
+]
+```
+
+> Run the below export command to set the MODEL_CONFIGS environment variable
+
+```bash
+export MODEL_CONFIGS=$(jq -c . model_configs.json)
+```
+
+> To show multiple models in ProductivitySuite UI, copy the model_configs.json file into public folder of ui
+
+```bash
+cd ../../../../
+cp docker_compose/intel/hpu/gaudi/model_configs.json ui/react/public/model_configs.json
+```
+
+> Build UI Docker Image
+
+```bash
+cd ui
+docker build --no-cache -t opea/productivity-suite-react-ui-server:latest -f docker/Dockerfile.react .
+```
+
+### Start all the services Docker Containers
+
+> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file
+
+#### All Services Local
+
+```bash
+cd ..
+cd docker_compose/intel/hpu/gaudi
+
+docker compose -f compose.yaml up -d
+```
+
+#### With vLLM and TEI running remotely
+
+```bash
+cd ..
+cd docker_compose/intel/hpu/gaudi
+
+docker compose -f compose_remote.yaml up -d
+```
+
+---
+
+### 🔐 Setup Keycloak
+
+Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more detail related to Keycloak configuration setup.
+
+---
+
+### ✅ Validate Microservices
+
+1. TEI Embedding Service
+
+   ```bash
+   curl ${host_ip}:6006/embed \
+       -X POST \
+       -d '{"inputs":"What is Deep Learning?"}' \
+       -H 'Content-Type: application/json'
+   ```
+
+2. Embedding Microservice
+
+   ```bash
+   curl http://${host_ip}:6000/v1/embeddings\
+     -X POST \
+     -d '{"text":"hello"}' \
+     -H 'Content-Type: application/json'
+   ```
+
+3. Retriever Microservice
+
+   To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
+   is determined by the embedding model.
+   Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.
+
+   Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.
+
+   ```bash
+   export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+   curl http://${host_ip}:7000/v1/retrieval \
+     -X POST \
+     -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
+     -H 'Content-Type: application/json'
+   ```
+
+4. TEI Reranking Service
+
+   ```bash
+   curl http://${host_ip}:8808/rerank \
+       -X POST \
+       -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+       -H 'Content-Type: application/json'
+   ```
+
+5. Reranking Microservice
+
+   ```bash
+   curl http://${host_ip}:8000/v1/reranking\
+     -X POST \
+     -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+     -H 'Content-Type: application/json'
+   ```
+
+6. LLM backend Service (ChatQnA, DocSum, FAQGen)
+
+   ```bash
+   curl http://${host_ip}:9009/v1/chat/completions \
+     -X POST \
+     -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+     -H 'Content-Type: application/json'
+   ```
+
+7. LLM backend Service (CodeGen)
+
+   ```bash
+   curl http://${host_ip}:8028/v1/chat/completions \
+     -X POST \
+     -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+     -H 'Content-Type: application/json'
+   ```
+
+8. ChatQnA LLM Microservice
+
+   ```bash
+   curl http://${host_ip}:9000/v1/chat/completions\
+     -X POST \
+     -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+     -H 'Content-Type: application/json'
+   ```
+
+9. CodeGen LLM Microservice
+
+   ```bash
+   curl http://${host_ip}:9001/v1/chat/completions\
+     -X POST \
+     -d '{"query":"def print_hello_world():"}' \
+     -H 'Content-Type: application/json'
+   ```
+
+10. DocSum LLM Microservice
+
+    ```bash
+    curl http://${host_ip}:9002/v1/chat/docsum\
+      -X POST \
+      -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
+      -H 'Content-Type: application/json'
+    ```
+
+11. FAQGen LLM Microservice
+
+    ```bash
+    curl http://${host_ip}:9003/v1/faqgen\
+      -X POST \
+      -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
+      -H 'Content-Type: application/json'
+    ```
+
+12. ChatQnA MegaService
+
+    ```bash
+    curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
+         "messages": "What is the revenue of Nike in 2023?"
+         }'
+    ```
+
+13. FAQGen MegaService
+
+    ```bash
+    curl http://${host_ip}:8889/v1/faqgen -H "Content-Type: application/json" -d '{
+         "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+         }'
+    ```
+
+14. DocSum MegaService
+
+    ```bash
+    curl http://${host_ip}:8890/v1/docsum -H "Content-Type: application/json" -d '{
+         "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+         }'
+    ```
+
+15. CodeGen MegaService
+
+    ```bash
+    curl http://${host_ip}:7778/v1/codegen -H "Content-Type: application/json" -d '{
+         "messages": "def print_hello_world():"
+         }'
+    ```
+
+16. Dataprep Microservice
+
+    If you want to update the default knowledge base, you can use the following commands:
+
+    Update Knowledge Base via Local File Upload:
+
+    ```bash
+    curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+         -H "Content-Type: multipart/form-data" \
+         -F "files=@./nke-10k-2023.pdf"
+    ```
+
+    This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
+
+    Add Knowledge Base via HTTP Links:
+
+    ```bash
+    curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+         -H "Content-Type: multipart/form-data" \
+         -F 'link_list=["https://opea.dev"]'
+    ```
+
+    This command updates a knowledge base by submitting a list of HTTP links for processing.
+
+    Also, you are able to get the file list that you uploaded:
+
+    ```bash
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
+         -H "Content-Type: application/json"
+    ```
+
+    To delete the file/link you uploaded:
+
+    ```bash
+    # delete link
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+         -d '{"file_path": "https://opea.dev.txt"}' \
+         -H "Content-Type: application/json"
+
+    # delete file
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+         -d '{"file_path": "nke-10k-2023.pdf"}' \
+         -H "Content-Type: application/json"
+
+    # delete all uploaded files and links
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+         -d '{"file_path": "all"}' \
+         -H "Content-Type: application/json"
+    ```
+
+17. Prompt Registry Microservice
+
+    If you want to update the default Prompts in the application for your user, you can use the following commands:
+
+    ```bash
+    curl -X 'POST' \
+      http://{host_ip}:6018/v1/prompt/create \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+        "prompt_text": "test prompt", "user": "test"
+    }'
+    ```
+
+    Retrieve prompt from database based on user or prompt_id
+
+    ```bash
+    curl -X 'POST' \
+      http://{host_ip}:6018/v1/prompt/get \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test"}'
+
+    curl -X 'POST' \
+      http://{host_ip}:6018/v1/prompt/get \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test", "prompt_id":"{prompt_id returned from save prompt route above}"}'
+    ```
+
+    Delete prompt from database based on prompt_id provided
+
+    ```bash
+    curl -X 'POST' \
+      http://{host_ip}:6018/v1/prompt/delete \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test", "prompt_id":"{prompt_id to be deleted}"}'
+    ```
+
+18. Chat History Microservice
+
+    To validate the chatHistory Microservice, you can use the following commands.
+
+    Create a sample conversation and get the message ID.
+
+    ```bash
+    curl -X 'POST' \
+      http://${host_ip}:6012/v1/chathistory/create \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "data": {
+        "messages": "test Messages", "user": "test"
+      }
+    }'
+    ```
+
+    Retrieve the conversation based on user or conversation id
+
+    ```bash
+    curl -X 'POST' \
+      http://${host_ip}:6012/v1/chathistory/get \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test"}'
+
+    curl -X 'POST' \
+      http://${host_ip}:6012/v1/chathistory/get \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test", "id":"{Conversation id to retrieve }"}'
+    ```
+
+    Delete Conversation from database based on conversation id provided.
+
+    ```bash
+    curl -X 'POST' \
+      http://${host_ip}:6012/v1/chathistory/delete \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test", "id":"{Conversation id to Delete}"}'
+    ```
+
+---
+
+## 🚀 Launch the UI
+
+To access the frontend, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
+
+```yaml
+  productivity-suite-gaudi-react-ui-server:
+    image: opea/productivity-suite-react-ui-server:latest
+    ...
+    ports:
+      - "5715:80" # Map port 5715 on the host to port 80 in the container.
+```
+
+Here is an example of running Productivity Suite
+![project-screenshot](../../../../assets/img/chat_qna_init.png)
+![project-screenshot](../../../../assets/img/Login_page.png)
+
+---
+
+## 🛠️ Key Features
+
+Here're some of the project's features:
+
+### 💬ChatQnA
+
+- **Start a Text Chat**：Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
+- **Context Awareness**: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.
+
+### 🎛️ Data Source
+
+- **File Upload or Remote Link**: The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
+- **File Management**:Uploaded File would get listed and user would be able add or remove file/links
+
+#### Screenshots
+
+![project-screenshot](../../../../assets/img/data_source.png)
+
+- **Clear Chat**: Clear the record of the current dialog box without retaining the contents of the dialog box.
+- **Chat history**: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
+- **Conversational Chat**: The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
+
+#### Screenshots
+
+![project-screenshot](../../../../assets/img/chat_qna_init.png)
+![project-screenshot](../../../../assets/img/chatqna_with_conversation.png)
+
+### 💻 Codegen
+
+- **Generate code**: generate the corresponding code based on the current user's input.
+
+#### Screenshots
+
+![project-screenshot](../../../../assets/img/codegen.png)
+
+### 📚 Document Summarization
+
+- **Summarizing Uploaded Files**: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box.
+- **Summarizing Text via Pasting**: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right.
+- **Scroll to Bottom**: The summarized content will automatically scroll to the bottom.
+
+#### Screenshots
+
+![project-screenshot](../../../../assets/img/doc_summary_paste.png)
+![project-screenshot](../../../../assets/img/doc_summary_file.png)
+
+### ❓ FAQ Generator
+
+- **Generate FAQs from Text via Pasting**: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
+
+- **Generate FAQs from Text via txt file Upload**: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
+
+#### Screenshots
+
+![project-screenshot](../../../../assets/img/faq_generator.png)
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml
new file mode 100644
index 0000000000..c6ea099ab2
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -0,0 +1,376 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+version: "3.3"
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    image: ghcr.io/huggingface/tei-gaudi:latest
+    container_name: tei-embedding-gaudi-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "./data_embedding:/data"
+    shm_size: 1g
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  embedding:
+    image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+    restart: unless-stopped
+  retriever:
+    image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/tei-gaudi:latest
+    container_name: tei-reranking-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data_tei:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
+    container_name: reranking-tei-gaudi-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    restart: unless-stopped
+  vllm-service:
+    image: opea/vllm-hpu:latest
+    container_name: vllm-service
+    ports:
+      - "9009:80"
+    volumes:
+      - "./data:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HF_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80
+  llm:
+    image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
+    container_name: llm-vllm-gaudi-server
+    depends_on:
+      - vllm-service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+    restart: unless-stopped
+  chatqna-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
+    container_name: chatqna-gaudi-backend-server
+    depends_on:
+      - redis-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - vllm-service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
+      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+    ipc: host
+    restart: always
+  vllm_service_codegen:
+    image: opea/vllm-hpu:latest
+    container_name: vllm_service_codegen
+    ports:
+      - "8028:80"
+    volumes:
+      - "./data_codegen:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --enforce-eager --model $LLM_MODEL_ID_CODEGEN --tensor-parallel-size 1 --host 0.0.0.0 --port 80
+  llm_codegen:
+    image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
+    container_name: llm-vllm-server-codegen
+    depends_on:
+      - vllm_service_codegen
+    ports:
+      - "9001:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT_CODEGEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LLM_MODEL_ID: ${LLM_MODEL_ID_CODEGEN}
+    restart: unless-stopped
+  codegen-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
+    container_name: codegen-gaudi-backend-server
+    depends_on:
+      - llm
+    ports:
+      - "7778:7778"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN}
+    ipc: host
+    restart: always
+  llm_faqgen:
+    image: ${REGISTRY:-opea}/llm-faqgen-vllm:${TAG:-latest}
+    container_name: llm-faqgen-server
+    depends_on:
+      - vllm-service
+    ports:
+      - "9002:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT_FAQGEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+    restart: unless-stopped
+  faqgen-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
+    container_name: faqgen-gaudi-backend-server
+    depends_on:
+      - vllm-service
+      - llm_faqgen
+    ports:
+      - "8889:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN}
+    ipc: host
+    restart: always
+  llm_docsum_server:
+    image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest}
+    container_name: llm-docsum-server
+    depends_on:
+      - vllm-service
+    ports:
+      - "9003:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT_DOCSUM}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+    restart: unless-stopped
+  docsum-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
+    container_name: docsum-gaudi-backend-server
+    depends_on:
+      - vllm-service
+      - llm_docsum_server
+    ports:
+      - "8890:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM}
+    ipc: host
+    restart: always
+  mongo:
+    image: mongo:7.0.11
+    container_name: mongodb
+    ports:
+      - 27017:27017
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+    command: mongod --quiet --logpath /dev/null
+  chathistory-mongo:
+    image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest}
+    container_name: chathistory-mongo-server
+    ports:
+      - "6012:6012"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      MONGO_HOST: ${MONGO_HOST}
+      MONGO_PORT: ${MONGO_PORT}
+      COLLECTION_NAME: ${COLLECTION_NAME}
+    restart: unless-stopped
+  promptregistry-mongo:
+    image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest}
+    container_name: promptregistry-mongo-server
+    ports:
+      - "6018:6018"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+      MONGO_HOST: ${MONGO_HOST}
+      MONGO_PORT: ${MONGO_PORT}
+      COLLECTION_NAME: ${PROMPT_COLLECTION_NAME}
+    restart: unless-stopped
+  keycloak:
+    image: quay.io/keycloak/keycloak:25.0.2
+    container_name: keycloak-server
+    ports:
+      - 8080:8080
+    environment:
+      - KEYCLOAK_ADMIN=admin
+      - KEYCLOAK_ADMIN_PASSWORD=admin
+      - KC_PROXY=edge
+    ipc: host
+    command: start-dev
+    restart: always
+  productivity-suite-gaudi-react-ui-server:
+    image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest}
+    container_name: productivity-suite-gaudi-react-ui-server
+    ports:
+      - "5174:80"
+    environment:
+      - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA}
+      - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN}
+      - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM}
+      - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN}
+      - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT}
+      - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT}
+      - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT}
+      - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT}
+      - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT}
+      - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT}
+      - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
+networks:
+  default:
+    driver: bridge
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml
new file mode 100644
index 0000000000..976dce4df2
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml
@@ -0,0 +1,288 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+version: "3.3"
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  embedding:
+    image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
+    container_name: embedding-tei-server
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  retriever:
+    image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    restart: unless-stopped
+  reranking:
+    image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
+    container_name: reranking-tei-gaudi-server
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  llm:
+    image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
+    container_name: llm-vllm-gaudi-server
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  chatqna-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
+    container_name: chatqna-gaudi-backend-server
+    depends_on:
+      - redis-vector-db
+      - embedding
+      - retriever
+      - reranking
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
+      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+    ipc: host
+    restart: always
+  llm_codegen:
+    image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
+    container_name: llm-vllm-server-codegen
+    ports:
+      - "9001:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT_CODEGEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+    restart: unless-stopped
+  codegen-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
+    container_name: codegen-gaudi-backend-server
+    ports:
+      - "7778:7778"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN}
+    ipc: host
+    restart: always
+  llm_faqgen:
+    image: ${REGISTRY:-opea}/llm-faqgen-vllm:${TAG:-latest}
+    container_name: llm-faqgen-server
+    ports:
+      - "9002:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT_FAQGEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+    restart: unless-stopped
+  faqgen-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
+    container_name: faqgen-gaudi-backend-server
+    depends_on:
+      - llm_faqgen
+    ports:
+      - "8889:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN}
+    ipc: host
+    restart: always
+  llm_docsum_server:
+    image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest}
+    container_name: llm-docsum-server
+    ports:
+      - "9003:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT_DOCSUM}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+    restart: unless-stopped
+  docsum-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
+    container_name: docsum-gaudi-backend-server
+    depends_on:
+      - llm_docsum_server
+    ports:
+      - "8890:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM}
+    ipc: host
+    restart: always
+  mongo:
+    image: mongo:7.0.11
+    container_name: mongodb
+    ports:
+      - 27017:27017
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+    command: mongod --quiet --logpath /dev/null
+  chathistory-mongo:
+    image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest}
+    container_name: chathistory-mongo-server
+    ports:
+      - "6012:6012"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      MONGO_HOST: ${MONGO_HOST}
+      MONGO_PORT: ${MONGO_PORT}
+      COLLECTION_NAME: ${COLLECTION_NAME}
+    restart: unless-stopped
+  promptregistry-mongo:
+    image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest}
+    container_name: promptregistry-mongo-server
+    ports:
+      - "6018:6018"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+      MONGO_HOST: ${MONGO_HOST}
+      MONGO_PORT: ${MONGO_PORT}
+      COLLECTION_NAME: ${PROMPT_COLLECTION_NAME}
+    restart: unless-stopped
+  keycloak:
+    image: quay.io/keycloak/keycloak:25.0.2
+    container_name: keycloak-server
+    ports:
+      - 8080:8080
+    environment:
+      - KEYCLOAK_ADMIN=admin
+      - KEYCLOAK_ADMIN_PASSWORD=admin
+      - KC_PROXY=edge
+    ipc: host
+    command:
+      - start-dev
+    restart: always
+  productivity-suite-gaudi-react-ui-server:
+    image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest}
+    container_name: productivity-suite-gaudi-react-ui-server
+    ports:
+      - "5174:80"
+    environment:
+      - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA}
+      - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN}
+      - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM}
+      - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN}
+      - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT}
+      - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT}
+      - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT}
+      - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT}
+      - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT}
+      - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT}
+      - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
+networks:
+  default:
+    driver: bridge
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/keycloak_setup_guide.md b/ProductivitySuite/docker_compose/intel/hpu/gaudi/keycloak_setup_guide.md
new file mode 100644
index 0000000000..1d9dd20061
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/keycloak_setup_guide.md
@@ -0,0 +1,27 @@
+# 🔐 Keycloak Configuration Setup
+
+This README document provides a comprehensive, step-by-step guide on how to configure **Keycloak** settings. The user management is facilitated via Keycloak, and the configuration is outlined below:
+
+1. Access the Keycloak admin console via url http:${host_ip}:8080 or endpoint that is exposed from your Kubernetes cluster to configure users. Use the default username(**admin**) and password(**admin**) to login.
+   ![project-screenshot](../../../../assets/img/keycloak_login.png)
+
+2. Create a new realm named **productivitysuite** within Keycloak.
+   ![project-screenshot](../../../../assets/img/create_realm.png)
+
+   ![project-screenshot](../../../../assets/img/create_productivitysuite_realm.png)
+
+3. Create a new client called **productivitysuite** with default configurations.
+   ![project-screenshot](../../../../assets/img/create_client.png)
+
+4. Select the **productivitysuite** client that you just created. Insert your ProductivitySuite UI url endpoint into **"Valid redirect URIs"** and **"Web origins"** field. Refer to screenshot below as an example:
+   ![project-screenshot](../../../../assets/img/productivitysuite_client_settings.png)
+
+5. From the left pane, select the Realm roles and create a new role named **user** and another new role as **viewer**.
+   ![project-screenshot](../../../../assets/img/create_roles.png)
+
+6. Create a new user named, for example, **mary** and another user as **bob**. Set passwords for both users (set **'Temporary'** to **'Off'**).Select **Role mapping** on the top, assign the user role to mary and assign the viewer role to bob.
+   ![project-screenshot](../../../../assets/img/create_users.png)
+
+   ![project-screenshot](../../../../assets/img/set_user_password.png)
+
+   ![project-screenshot](../../../../assets/img/user_role_mapping.png)
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh
new file mode 100644
index 0000000000..a64550f204
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -0,0 +1,48 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+export MONGO_HOST=${host_ip}
+export MONGO_PORT=27017
+export DB_NAME="opea"
+export COLLECTION_NAME="Conversations"
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
+export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip}
+export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip}
+export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip}
+export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip}
+export TGI_LLM_ENDPOINT_CHATQNA="http://${host_ip}:9009"
+export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028"
+export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009"
+export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009"
+export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
+export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen"
+export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen"
+export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
+export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
+export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete"
+export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get"
+export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get"
+export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create"
+export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080"
+export LLM_SERVICE_HOST_PORT_FAQGEN=9002
+export LLM_SERVICE_HOST_PORT_CODEGEN=9001
+export LLM_SERVICE_HOST_PORT_DOCSUM=9003
+export PROMPT_COLLECTION_NAME="prompt"
diff --git a/ProductivitySuite/docker_image_build/build.yaml b/ProductivitySuite/docker_image_build/build.yaml
index 2f75b848f5..cbd35a6216 100644
--- a/ProductivitySuite/docker_image_build/build.yaml
+++ b/ProductivitySuite/docker_image_build/build.yaml
@@ -98,3 +98,21 @@ services:
       dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
     extends: docsum
     image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
+  llm-vllm:
+    build:
+      context: GenAIComps
+      dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
+    extends: chatqna
+    image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
+  llm-faqgen-vllm:
+    build:
+      context: GenAIComps
+      dockerfile: comps/llms/faq-generation/vllm/langchain/Dockerfile
+    extends: faqgen
+    image: ${REGISTRY:-opea}/llm-faqgen-vllm:${TAG:-latest}
+  llm-docsum-vllm:
+    build:
+      context: GenAIComps
+      dockerfile: comps/llms/summarization/vllm/langchain/Dockerfile
+    extends: docsum
+    image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest}
diff --git a/ProductivitySuite/kubernetes/intel/README.md b/ProductivitySuite/kubernetes/intel/README.md
index 6a0d4188ab..3c473753b6 100644
--- a/ProductivitySuite/kubernetes/intel/README.md
+++ b/ProductivitySuite/kubernetes/intel/README.md
@@ -76,7 +76,7 @@ To begin with, ensure that you have following prerequisites in place:
 ##  🌐 Deploying ProductivitySuite
 You can use yaml files in xeon folder to deploy ProductivitySuite with reactUI.
 ```
-cd GenAIExamples/ProductivitySuite/kubernetes/intel/cpu/xeon/manifests/
+cd GenAIExamples/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/
 kubectl apply -f .
 ```
 
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
index 43de640ad3..c3a65e92b6 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
@@ -993,7 +993,7 @@ spec:
                 name: chatqna-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
index 6c52c5d921..5eb3cd6eb4 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -229,7 +229,7 @@ spec:
                 name: codegen-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
index 0fda41f5e1..44d16ee9a8 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
@@ -229,7 +229,7 @@ spec:
                 name: docsum-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
index 749d984082..2c0b3bffc2 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
@@ -138,7 +138,7 @@ spec:
             - configMapRef:
                 name: faqgen-tgi-config
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/tests/test_compose_on_gaudi.sh b/ProductivitySuite/tests/test_compose_on_gaudi.sh
new file mode 100755
index 0000000000..89a9a7aeae
--- /dev/null
+++ b/ProductivitySuite/tests/test_compose_on_gaudi.sh
@@ -0,0 +1,391 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -xe
+IMAGE_REPO=${IMAGE_REPO:-"opea"}
+IMAGE_TAG=${IMAGE_TAG:-"latest"}
+echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
+echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
+export REGISTRY=${IMAGE_REPO}
+export TAG=${IMAGE_TAG}
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+ip_address=$(hostname -I | awk '{print $1}')
+
+function build_docker_images() {
+    cd $WORKPATH/docker_image_build
+    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
+
+    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
+    docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
+
+    # docker pull ghcr.io/huggingface/tei-gaudi:latest
+    # docker pull opea/vllm-hpu:latest
+    docker images && sleep 1s
+}
+
+function start_services() {
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+
+    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+    export LLM_MODEL_ID_CODEGEN="Intel/neural-chat-7b-v3-3"
+    export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
+    export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
+    export vLLM_ENDPOINT="http://${ip_address}:9009"
+    export REDIS_URL="redis://${ip_address}:6379"
+    export REDIS_HOST=${ip_address}
+    export INDEX_NAME="rag-redis"
+    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export MEGA_SERVICE_HOST_IP=${ip_address}
+    export EMBEDDING_SERVICE_HOST_IP=${ip_address}
+    export RETRIEVER_SERVICE_HOST_IP=${ip_address}
+    export RERANK_SERVICE_HOST_IP=${ip_address}
+    export LLM_SERVICE_HOST_IP=${ip_address}
+    export LLM_SERVICE_HOST_IP_DOCSUM=${ip_address}
+    export LLM_SERVICE_HOST_IP_FAQGEN=${ip_address}
+    export LLM_SERVICE_HOST_IP_CODEGEN=${ip_address}
+    export LLM_SERVICE_HOST_IP_CHATQNA=${ip_address}
+    export vLLM_ENDPOINT_CHATQNA="http://${ip_address}:9009"
+    export vLLM_ENDPOINT_CODEGEN="http://${ip_address}:8028"
+    export vLLM_ENDPOINT_FAQGEN="http://${ip_address}:9009"
+    export vLLM_ENDPOINT_DOCSUM="http://${ip_address}:9009"
+    export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${ip_address}:8888/v1/chatqna"
+    export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${ip_address}:8889/v1/faqgen"
+    export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6009/v1/dataprep/delete_file"
+    export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${ip_address}:7778/v1/codegen"
+    export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${ip_address}:8890/v1/docsum"
+    export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get_file"
+    export CHAT_HISTORY_CREATE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/create"
+    export CHAT_HISTORY_CREATE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/create"
+    export CHAT_HISTORY_DELETE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/delete"
+    export CHAT_HISTORY_GET_ENDPOINT="http://${ip_address}:6012/v1/chathistory/get"
+    export PROMPT_SERVICE_GET_ENDPOINT="http://${ip_address}:6018/v1/prompt/get"
+    export PROMPT_SERVICE_CREATE_ENDPOINT="http://${ip_address}:6018/v1/prompt/create"
+    export KEYCLOAK_SERVICE_ENDPOINT="http://${ip_address}:8080"
+    export MONGO_HOST=${ip_address}
+    export MONGO_PORT=27017
+    export DB_NAME="opea"
+    export COLLECTION_NAME="Conversations"
+    export LLM_SERVICE_HOST_PORT_FAQGEN=9002
+    export LLM_SERVICE_HOST_PORT_CODEGEN=9001
+    export LLM_SERVICE_HOST_PORT_DOCSUM=9003
+    export RERANK_SERVER_PORT=8808
+    export EMBEDDING_SERVER_PORT=6006
+    export LLM_SERVER_PORT=9009
+    export PROMPT_COLLECTION_NAME="prompt"
+
+    # Start Docker Containers
+    docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
+    n=0
+    until [[ "$n" -ge 100 ]]; do
+        docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log
+        if grep -q Connected ${LOG_PATH}/vllm_service_start.log; then
+            echo "ChatQnA VLLM Service Connected"
+            break
+        fi
+        sleep 5s
+        n=$((n+1))
+    done
+    n=0
+    until [[ "$n" -ge 100 ]]; do
+        docker logs vllm_service_codegen > ${LOG_PATH}/vllm_service_codegen_start.log
+        if grep -q Connected ${LOG_PATH}/vllm_service_codegen_start.log; then
+            echo "CodeGen VLLM Service Connected"
+            break
+        fi
+        sleep 5s
+        n=$((n+1))
+    done
+}
+
+function validate_service() {
+    local URL="$1"
+    local EXPECTED_RESULT="$2"
+    local SERVICE_NAME="$3"
+    local DOCKER_NAME="$4"
+    local INPUT_DATA="$5"
+
+    if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then
+        cd $LOG_PATH
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep_upload_link"* ]]; then
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
+    elif [[ $SERVICE_NAME == *"docsum-gaudi-backend-server"* ]]; then
+	local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
+    else
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+    fi
+    HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
+    RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
+
+    docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+
+    # check response status
+    if [ "$HTTP_STATUS" -ne "200" ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        exit 1
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+    fi
+    # check response body
+    if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then
+        echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
+        exit 1
+    else
+        echo "[ $SERVICE_NAME ] Content is as expected."
+    fi
+
+    sleep 1s
+}
+
+function validate_microservices() {
+    #Check if the microservices are running correctly.
+
+    # tei for embedding service
+    validate_service \
+        "${ip_address}:6006/embed" \
+        "[[" \
+        "tei-embedding" \
+        "tei-embedding-gaudi-server" \
+        '{"inputs":"What is Deep Learning?"}'
+
+    # embedding microservice
+    validate_service \
+        "${ip_address}:6000/v1/embeddings" \
+        '"text":"What is Deep Learning?","embedding":[' \
+        "embedding-microservice" \
+        "embedding-tei-server" \
+        '{"text":"What is Deep Learning?"}'
+
+    sleep 1m # retrieval can't curl as expected, try to wait for more time
+
+    # test /v1/dataprep upload file
+    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
+    validate_service \
+        "http://${ip_address}:6007/v1/dataprep" \
+        "Data preparation succeeded" \
+        "dataprep_upload_file" \
+        "dataprep-redis-server"
+
+    # test /v1/dataprep upload link
+    validate_service \
+        "http://${ip_address}:6007/v1/dataprep" \
+        "Data preparation succeeded" \
+        "dataprep_upload_link" \
+        "dataprep-redis-server"
+
+    # test /v1/dataprep/get_file
+    validate_service \
+        "http://${ip_address}:6007/v1/dataprep/get_file" \
+        '{"name":' \
+        "dataprep_get" \
+        "dataprep-redis-server"
+
+    # test /v1/dataprep/delete_file
+    validate_service \
+        "http://${ip_address}:6007/v1/dataprep/delete_file" \
+        '{"status":true}' \
+        "dataprep_del" \
+        "dataprep-redis-server"
+
+    # retrieval microservice
+    test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+    validate_service \
+        "${ip_address}:7000/v1/retrieval" \
+        "retrieved_docs" \
+        "retrieval-microservice" \
+        "retriever-redis-server" \
+        "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
+
+    # tei for rerank microservice
+    validate_service \
+        "${ip_address}:8808/rerank" \
+        '{"index":1,"score":' \
+        "tei-rerank" \
+        "tei-reranking-server" \
+        '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
+
+    # rerank microservice
+    validate_service \
+        "${ip_address}:8000/v1/reranking" \
+        "Deep learning is..." \
+        "rerank-microservice" \
+        "reranking-tei-gaudi-server" \
+        '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
+
+    # vllm for llm service
+    validate_service \
+        "${ip_address}:9009/v1/completions" \
+        "text" \
+        "vllm-llm" \
+        "vllm-service" \
+        '{"model": "Intel/neural-chat-7b-v3-3","prompt": "What is Deep Learning?","max_tokens": 32,"temperature": 0}'
+
+    # ChatQnA llm microservice
+    validate_service \
+        "${ip_address}:9000/v1/chat/completions" \
+        "data: " \
+        "llm-microservice" \
+        "llm-vllm-gaudi-server" \
+        '{"query":"What is Deep Learning?","model": "Intel/neural-chat-7b-v3-3"}'
+
+    # FAQGen llm microservice
+    validate_service \
+        "${ip_address}:9002/v1/faqgen" \
+        "data: " \
+        "llm_faqgen" \
+        "llm-faqgen-server" \
+        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+
+    # Docsum llm microservice
+    validate_service \
+        "${ip_address}:9003/v1/chat/docsum" \
+        "data: " \
+        "llm_docsum" \
+        "llm-docsum-server" \
+        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+
+    # CodeGen llm microservice
+    validate_service \
+        "${ip_address}:9001/v1/chat/completions" \
+        "data: " \
+        "llm_codegen" \
+        "llm-vllm-server-codegen" \
+        '{"query":"def print_hello_world():","model": "Intel/neural-chat-7b-v3-3"}'
+
+    result=$(curl -X 'POST' \
+    http://${ip_address}:6012/v1/chathistory/create \
+    -H 'accept: application/json' \
+    -H 'Content-Type: application/json' \
+    -d '{
+    "data": {
+        "messages": "test Messages", "user": "test"
+    }
+    }')
+        echo $result
+        if [[ ${#result} -eq 26 ]]; then
+            echo "Correct result."
+        else
+            echo "Incorrect result."
+            exit 1
+        fi
+
+        result=$(curl -X 'POST' \
+    http://$ip_address:6018/v1/prompt/create \
+    -H 'accept: application/json' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "prompt_text": "test prompt", "user": "test"
+    }')
+        echo $result
+        if [[ ${#result} -eq 26 ]]; then
+            echo "Correct result."
+        else
+            echo "Incorrect result."
+            exit 1
+        fi
+
+}
+
+
+function validate_megaservice() {
+
+
+    # Curl the ChatQnAMega Service
+    validate_service \
+        "${ip_address}:8888/v1/chatqna" \
+        "data: " \
+        "chatqna-megaservice" \
+        "chatqna-gaudi-backend-server" \
+        '{"messages": "What is the revenue of Nike in 2023?"}'\
+
+
+    # Curl the FAQGen Service
+    validate_service \
+        "${ip_address}:8889/v1/faqgen" \
+        "Text Embeddings Inference" \
+        "faqgen-gaudi-backend-server" \
+        "faqgen-gaudi-backend-server" \
+        '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'\
+
+    # Curl the DocSum Mega Service
+    validate_service \
+        "${ip_address}:8890/v1/docsum" \
+        "embedding" \
+        "docsum-gaudi-backend-server" \
+        "docsum-gaudi-backend-server" \
+        '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+
+
+    # Curl the CodeGen Mega Service
+    validate_service \
+    "${ip_address}:7778/v1/codegen" \
+        "print" \
+        "codegen-gaudi-backend-server" \
+        "codegen-gaudi-backend-server" \
+        '{"messages": "def print_hello_world():"}'
+}
+
+function validate_frontend() {
+    echo "[ TEST INFO ]: --------- frontend test started ---------"
+    cd $WORKPATH/ui/react
+    local conda_env_name="OPEA_e2e"
+    export PATH=${HOME}/miniforge3/bin/:$PATH
+#    conda remove -n ${conda_env_name} --all -y
+#    conda create -n ${conda_env_name} python=3.12 -y
+    source activate ${conda_env_name}
+    echo "[ TEST INFO ]: --------- conda env activated ---------"
+
+#   conda install -c conda-forge nodejs -y
+    npm install && npm ci
+    node -v && npm -v && pip list
+
+    exit_status=0
+    npm run test || exit_status=$?
+
+    if [ $exit_status -ne 0 ]; then
+        echo "[TEST INFO]: ---------frontend test failed---------"
+        exit $exit_status
+    else
+        echo "[TEST INFO]: ---------frontend test passed---------"
+    fi
+}
+
+function stop_docker() {
+    cd $WORKPATH/docker_compose/intel/hpu/gaudi
+    docker compose stop && docker compose rm -f
+}
+
+function main() {
+
+    stop_docker
+    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
+    start_time=$(date +%s)
+    start_services
+    end_time=$(date +%s)
+    duration=$((end_time-start_time))
+    echo "Mega service start duration is $duration s" && sleep 1s
+
+    validate_microservices
+    echo "==== microservices validated ===="
+    validate_megaservice
+    echo "==== megaservices validated ===="
+    validate_frontend
+    echo "==== frontend validated ===="
+
+    stop_docker
+    echo y | docker system prune
+
+}
+
+main
diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh
index 7c18f35d42..11a45f734a 100755
--- a/ProductivitySuite/tests/test_compose_on_xeon.sh
+++ b/ProductivitySuite/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/text-generation-inference:2.1.0
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
@@ -74,6 +74,9 @@ function start_services() {
     export LLM_SERVICE_HOST_PORT_FAQGEN=9002
     export LLM_SERVICE_HOST_PORT_CODEGEN=9001
     export LLM_SERVICE_HOST_PORT_DOCSUM=9003
+    export RERANK_SERVER_PORT=8808
+    export EMBEDDING_SERVER_PORT=6006
+    export LLM_SERVER_PORT=9009
     export PROMPT_COLLECTION_NAME="prompt"
 
     # Start Docker Containers
@@ -116,6 +119,9 @@ function validate_service() {
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
+    elif [[ $SERVICE_NAME == *"docsum-xeon-backend-server"* ]]; then
+	local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
     else
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
     fi
@@ -315,7 +321,7 @@ function validate_megaservice() {
     # Curl the DocSum Mega Service
     validate_service \
         "${ip_address}:8890/v1/docsum" \
-        "toolkit" \
+        "embedding" \
         "docsum-xeon-backend-server" \
         "docsum-xeon-backend-server" \
         '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
diff --git a/ProductivitySuite/ui/docker/Dockerfile.react b/ProductivitySuite/ui/docker/Dockerfile.react
index f023b7afbc..4c4d72761b 100644
--- a/ProductivitySuite/ui/docker/Dockerfile.react
+++ b/ProductivitySuite/ui/docker/Dockerfile.react
@@ -18,4 +18,4 @@ COPY --from=vite-app /usr/app/react/dist /usr/share/nginx/html
 COPY ./react/env.sh /docker-entrypoint.d/env.sh
 
 COPY ./react/nginx.conf /etc/nginx/conf.d/default.conf
-RUN chmod +x /docker-entrypoint.d/env.sh
\ No newline at end of file
+RUN chmod +x /docker-entrypoint.d/env.sh
diff --git a/ProductivitySuite/ui/react/.env.production b/ProductivitySuite/ui/react/.env.production
index a7b38a272c..f881c388ce 100644
--- a/ProductivitySuite/ui/react/.env.production
+++ b/ProductivitySuite/ui/react/.env.production
@@ -13,4 +13,4 @@ VITE_CHAT_HISTORY_CREATE_ENDPOINT=APP_CHAT_HISTORY_CREATE_ENDPOINT
 VITE_CHAT_HISTORY_GET_ENDPOINT=APP_CHAT_HISTORY_GET_ENDPOINT
 VITE_CHAT_HISTORY_DELETE_ENDPOINT=APP_CHAT_HISTORY_DELETE_ENDPOINT
 VITE_PROMPT_SERVICE_GET_ENDPOINT=APP_PROMPT_SERVICE_GET_ENDPOINT
-VITE_PROMPT_SERVICE_CREATE_ENDPOINT=APP_PROMPT_SERVICE_CREATE_ENDPOINT
\ No newline at end of file
+VITE_PROMPT_SERVICE_CREATE_ENDPOINT=APP_PROMPT_SERVICE_CREATE_ENDPOINT
diff --git a/ProductivitySuite/ui/react/nginx.conf b/ProductivitySuite/ui/react/nginx.conf
index 00433fcda7..01aef12751 100644
--- a/ProductivitySuite/ui/react/nginx.conf
+++ b/ProductivitySuite/ui/react/nginx.conf
@@ -17,4 +17,4 @@ server {
       expires 1d;
     }
   }
-}
\ No newline at end of file
+}
diff --git a/ProductivitySuite/ui/react/src/App.tsx b/ProductivitySuite/ui/react/src/App.tsx
index c12ee1d8fa..9f0fdee957 100644
--- a/ProductivitySuite/ui/react/src/App.tsx
+++ b/ProductivitySuite/ui/react/src/App.tsx
@@ -18,12 +18,12 @@ import { useAppDispatch } from "./redux/store";
 import { setUser } from "./redux/User/userSlice";
 import { useEffect } from "react";
 
-const title = "Chat QnA"
+const title = "Digital Assistant"
 const navList: SidebarNavList = [
-  { icon: IconMessages, label: "Chat Qna", path: "/", children: <Conversation title={title} /> },
-  { icon: IconCode, label: "CodeGen", path: "/codegen", children: <CodeGen /> },
-  { icon: IconFileTextAi, label: "DocSum", path: "/docsum", children: <DocSum /> },
-  { icon: IconFileInfo, label: "FaqGen", path: "/faqgen", children: <FaqGen /> },
+  { icon: IconMessages, label: "Digital Assistant", path: "/", children: <Conversation title={title} /> },
+  { icon: IconCode, label: "Code Generator", path: "/codegen", children: <CodeGen /> },
+  { icon: IconFileTextAi, label: "Content Summarizer", path: "/docsum", children: <DocSum /> },
+  { icon: IconFileInfo, label: "Faq Generator", path: "/faqgen", children: <FaqGen /> },
   { icon: IconDatabaseCog, label: "Data Management", path: "/data-management", children: <DataSource /> }
 ]
 
diff --git a/ProductivitySuite/ui/react/src/assets/react.svg b/ProductivitySuite/ui/react/src/assets/react.svg
index 6c87de9bb3..8e0e0f15c0 100644
--- a/ProductivitySuite/ui/react/src/assets/react.svg
+++ b/ProductivitySuite/ui/react/src/assets/react.svg
@@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
diff --git a/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx b/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx
index 29c96f61cb..497ca0d010 100644
--- a/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx
+++ b/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx
@@ -8,7 +8,8 @@ import { IconArrowRight } from '@tabler/icons-react'
 import { ConversationMessage } from '../Message/conversationMessage'
 import { fetchEventSource } from '@microsoft/fetch-event-source'
 import { CODE_GEN_URL } from '../../config'
-
+import { conversationSelector } from '../../redux/Conversation/ConversationSlice';
+import { useAppSelector } from "../../redux/store";
 
 
 const CodeGen = () => {
@@ -17,6 +18,7 @@ const CodeGen = () => {
     const [response,setResponse] = useState<string>("");
     const promptInputRef = useRef<HTMLTextAreaElement>(null)
     const scrollViewport = useRef<HTMLDivElement>(null)
+    const { model } = useAppSelector(conversationSelector);
 
     const toSend = "Enter"
 
@@ -24,7 +26,8 @@ const CodeGen = () => {
         setResponse("")
         setSubmittedPrompt(prompt)
         const body = {
-            messages:prompt
+            messages:prompt,
+            model: model
         }
         fetchEventSource(CODE_GEN_URL, {
             method: "POST",
@@ -97,7 +100,7 @@ const CodeGen = () => {
             <div className={styleClasses.codeGenContent}>
                 <div className={styleClasses.codeGenContentMessages}>
                     <div className={styleClasses.codeGenTitle}>
-                        <Title order={3}>CodeGen</Title>
+                        <Title order={3}>Code Generator</Title>
                     </div>
 
                     <div className={styleClasses.historyContainer} ref={scrollViewport}>
diff --git a/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx b/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx
index e772248f39..e6455c85eb 100644
--- a/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx
+++ b/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx
@@ -4,8 +4,8 @@
 import { KeyboardEventHandler, SyntheticEvent, useEffect, useRef, useState } from 'react'
 import styleClasses from "./conversation.module.scss"
 import { ActionIcon, Group, Textarea, Title, Tooltip, rem } from '@mantine/core'
-import { IconArrowDown, IconArrowRight, IconArrowUp, IconMessagePlus } from '@tabler/icons-react'
-import { conversationSelector, doConversation, getAllConversations, newConversation, setSystemPrompt } from '../../redux/Conversation/ConversationSlice'
+import { IconArrowDown, IconArrowRight, IconMessagePlus, IconPencil } from '@tabler/icons-react'
+import { conversationSelector, doConversation, getAllConversations, newConversation, setSystemPrompt} from '../../redux/Conversation/ConversationSlice'
 import { ConversationMessage } from '../Message/conversationMessage'
 import { useAppDispatch, useAppSelector } from '../../redux/store'
 import { Message, MessageRole } from '../../redux/Conversation/Conversation'
@@ -21,7 +21,7 @@ type ConversationProps = {
 
 const Conversation = ({ title }: ConversationProps) => {
   const [prompt, setPrompt] = useState<string>("")
-  const [updateSystemPrompt, setUpdateSystemPrompt] = useState(false)
+  const [updateSystemPrompt, setUpdateSystemPrompt] = useState(true)
 
   const dispatch = useAppDispatch();
   const promptInputRef = useRef<HTMLTextAreaElement>(null)
@@ -53,8 +53,11 @@ const Conversation = ({ title }: ConversationProps) => {
     //     return { role: message.role, content: message.content }
     //   })
     // }
-
-    messages = [systemPromptObject, ...(selectedConversationHistory)]
+    if(selectedConversationHistory.length==0){
+      messages = [systemPromptObject, ...(selectedConversationHistory)]
+    }else{
+      messages = [...(selectedConversationHistory)]
+    }
 
     doConversation({
       conversationId: selectedConversationId,
@@ -111,7 +114,7 @@ const Conversation = ({ title }: ConversationProps) => {
     <div className={styleClasses.conversationWrapper}>
       <ConversationSideBar title={title} />
       <div className={styleClasses.conversationContent}>
-        <div className={styleClasses.conversationContentMessages} style={updateSystemPrompt ? { gridTemplateRows: `60px 1fr 160px` } : {} }>
+        <div className={styleClasses.conversationContentMessages} style={updateSystemPrompt ? { gridTemplateRows: `60px 1fr 180px` } : {} }>
           <div className={styleClasses.conversationTitle}>
             <Title order={3} className={styleClasses.title}>{selectedConversation?.first_query || ""} </Title>
             <span className={styleClasses.spacer}></span>
@@ -151,6 +154,7 @@ const Conversation = ({ title }: ConversationProps) => {
 
           <div className={styleClasses.conversationActions}>
             <Textarea
+              label="System Prompt"
               style={{
                 display: updateSystemPrompt ? 'block' : 'none',
                 marginBottom: '10px',
@@ -166,7 +170,7 @@ const Conversation = ({ title }: ConversationProps) => {
               <Tooltip label="update system prompt">
                 <ActionIcon onClick={() => setUpdateSystemPrompt((prev) => !prev)} size={32} radius="xl" variant="filled">
                   {updateSystemPrompt ? (<IconArrowDown style={{ width: rem(18), height: rem(18) }} stroke={1.5} />) :
-                    (<IconArrowUp style={{ width: rem(18), height: rem(18) }} stroke={1.5} />)}
+                    (<IconPencil style={{ width: rem(18), height: rem(18) }} stroke={1.5} />)}
                 </ActionIcon>
               </Tooltip>
               
diff --git a/ProductivitySuite/ui/react/src/components/Conversation/DataSource.tsx b/ProductivitySuite/ui/react/src/components/Conversation/DataSource.tsx
index cc273aa83c..72575af06e 100644
--- a/ProductivitySuite/ui/react/src/components/Conversation/DataSource.tsx
+++ b/ProductivitySuite/ui/react/src/components/Conversation/DataSource.tsx
@@ -90,4 +90,4 @@ export default function DataSource() {
       </Container>
     </div>
   )
-}
\ No newline at end of file
+}
diff --git a/ProductivitySuite/ui/react/src/components/Conversation/PromptTemplate.tsx b/ProductivitySuite/ui/react/src/components/Conversation/PromptTemplate.tsx
index 4acb55743c..723e1c4295 100644
--- a/ProductivitySuite/ui/react/src/components/Conversation/PromptTemplate.tsx
+++ b/ProductivitySuite/ui/react/src/components/Conversation/PromptTemplate.tsx
@@ -28,4 +28,4 @@ function PromptTemplate({setPrompt}:PromptTemplateProps) {
   )
 }
 
-export default PromptTemplate
\ No newline at end of file
+export default PromptTemplate
diff --git a/ProductivitySuite/ui/react/src/components/Conversation/settings.tsx b/ProductivitySuite/ui/react/src/components/Conversation/settings.tsx
index 61fd87e9b8..d5a4a947fe 100644
--- a/ProductivitySuite/ui/react/src/components/Conversation/settings.tsx
+++ b/ProductivitySuite/ui/react/src/components/Conversation/settings.tsx
@@ -1,13 +1,33 @@
-import { NumberInput, Slider, Text, Title } from "@mantine/core"
+import { NumberInput, Select, Slider, Text, Title } from "@mantine/core"
 import { useAppDispatch, useAppSelector } from "../../redux/store"
-import { conversationSelector, setTemperature, setToken } from "../../redux/Conversation/ConversationSlice"
-
+import { conversationSelector, setTemperature, setToken, setModel, setMinToken, setMaxToken, setModels} from "../../redux/Conversation/ConversationSlice"
+import { useEffect } from "react";
 
 
 function Settings() {
-    const { token,maxTemperature, minTemperature, maxToken, minToken, temperature} = useAppSelector(conversationSelector)
+    const { token, maxTemperature, minTemperature, maxToken, minToken, temperature, models, model } = useAppSelector(conversationSelector)
     const dispatch = useAppDispatch();
-    
+
+    const modelOptions = models.map(model => ({
+        value: model.model_name,
+        label: model.displayName,
+        minToken: model.minToken,
+        maxToken: model.maxToken,
+    }));
+
+    const onModelChange = (value: string | null) => {
+        if (value) {
+            const selectedModel = models.find(m => m.model_name === value);
+            if (selectedModel) {
+                dispatch(setModel(value));
+                dispatch(setTemperature(0.4)); // Assuming you want to reset to a default value
+                dispatch(setToken(selectedModel.minToken));
+                dispatch(setMinToken(selectedModel.minToken));
+                dispatch(setMaxToken(selectedModel.maxToken));
+                // You might also want to update the min and max token values in the redux state here
+            }
+        }
+    };
     const onTemperatureChange = (value: number) => {
         dispatch(setTemperature(value))
     }
@@ -15,11 +35,58 @@ function Settings() {
         dispatch(setToken(Number(value)))
     }
 
+    const callFunctions = async () => {
+        try {
+            const response = await fetch('/model_configs.json');
+            if (!response.ok) {
+                throw new Error(`HTTP error! status: ${response.status}`);
+            }
+            const model_configs = await response.json();
+            // Check if the array is empty
+            if (model_configs.length === 0) {
+                throw new Error('The model_configs.json file contains an empty array.');
+            }
+            // Validate that each object contains the required fields with non-empty values
+            const requiredFields = ['model_name', 'displayName', 'endpoint', 'minToken', 'maxToken'];
+            for (const config of model_configs) {
+                for (const field of requiredFields) {
+                    if (!(field in config) || config[field] === '') {
+                        throw new Error(`One or more configurations are missing the required field '${field}' or the field is empty.`);
+                    }
+                }
+            }
+            // After validation, update the state with the new configs
+            dispatch(setModels(model_configs));
+            dispatch(setMinToken(model_configs[0].minToken));
+            dispatch(setMaxToken(model_configs[0].maxToken));
+            dispatch(setModel(model_configs[0].model_name));
+        } catch (error) {
+            console.warn('model_configs.json not found, using default configuration.', error);
+            // If the fetch fails, the state will remain with the default values
+        }
+    }
+    
+    useEffect(() => {
+        callFunctions()
+    }, [])
+
     return (
         <>
+        
             <div>
                 <Title order={4}>Settings</Title>
             </div>
+            {models.length > 0 && (
+                <div>
+                    <Select
+                        label="Model"
+                        placeholder="Pick a model"
+                        value={model}
+                        onChange={onModelChange}
+                        data={modelOptions}
+                    />
+                </div>
+            )}
             <div>
                 <Text>Temperature</Text>
                 <Slider
@@ -41,8 +108,8 @@ function Settings() {
                 />
             </div>
         </>
-
+        
     )
 }
 
-export default Settings
\ No newline at end of file
+export default Settings
diff --git a/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx b/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx
index 9e7472c658..075f2f237d 100644
--- a/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx
+++ b/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx
@@ -7,7 +7,8 @@ import { fetchEventSource } from '@microsoft/fetch-event-source'
 import { notifications } from '@mantine/notifications'
 import { DOC_SUM_URL } from '../../config'
 import { FileWithPath } from '@mantine/dropzone'
-
+import { conversationSelector } from '../../redux/Conversation/ConversationSlice';
+import { useAppSelector } from "../../redux/store";
 
 const DocSum = () => {
     const [isFile, setIsFile] = useState<boolean>(false);
@@ -16,6 +17,18 @@ const DocSum = () => {
     const [value, setValue] = useState<string>('');
     const [fileContent, setFileContent] = useState<string>('');
     const [response, setResponse] = useState<string>('');
+    const { model } = useAppSelector(conversationSelector);
+
+    let messagesEnd:HTMLDivElement;
+
+    const scrollToView = () => {
+        if (messagesEnd) {
+            messagesEnd.scrollTop = messagesEnd.scrollHeight;
+        }
+    };
+    useEffect(()=>{
+        scrollToView()
+    },[response])
     
     useEffect(() => {
         if(isFile){
@@ -48,7 +61,8 @@ const DocSum = () => {
 
     setIsGenerating(true)
     const body = {
-            messages: isFile ? fileContent : value
+            messages: isFile ? fileContent : value,
+            model: model
     }
     fetchEventSource(DOC_SUM_URL, {
         method: "POST",
@@ -72,17 +86,11 @@ const DocSum = () => {
         onmessage(msg) {
             if (msg?.data != "[DONE]") {
                 try {
-                    const res = JSON.parse(msg.data)
-                    const logs = res.ops;
-                    logs.forEach((log: { op: string; path: string; value: string }) => {
-                        if (log.op === "add") {
-                            if (
-                                log.value !== "</s>" && log.path.endsWith("/streamed_output/-") && log.path.length > "/streamed_output/-".length
-                            ) {
-                               setResponse(prev=>prev+log.value);
-                            }
-                        }
-                    });
+                    const match = msg.data.match(/b'([^']*)'/);
+                    if (match && match[1] != "</s>") {
+                        const extractedText = match[1];
+                        setResponse(prev => (prev + extractedText.replace("<|eot_id|>", "").replace(/\\n/g, "\n")));
+                    }
                 } catch (e) {
                     console.log("something wrong in msg", e);
                     throw e;
@@ -106,7 +114,7 @@ const DocSum = () => {
             <div className={styleClasses.docSumContent}>
                 <div className={styleClasses.docSumContentMessages}>
                     <div className={styleClasses.docSumTitle}>
-                        <Title order={3}>Doc Summary</Title>
+                        <Title order={3}>Content Summarizer</Title>
                     </div>
                     <div>
                         <Text size="lg" >Please upload file or paste content for summarization.</Text>
@@ -139,7 +147,10 @@ const DocSum = () => {
                         <Button loading={isGenerating} loaderProps={{ type: 'dots' }} onClick={handleSubmit}>Generate Summary</Button>
                     </div>
                     {response && (
-                        <div className={styleClasses.docSumResult}>
+                        <div className={styleClasses.docSumResult} ref={(el) => {
+                            if(el)
+                                messagesEnd = el;
+                        }}>
                             <Markdown content={response} />
                         </div>
                     )}
@@ -150,4 +161,4 @@ const DocSum = () => {
     )
 }
 
-export default DocSum
\ No newline at end of file
+export default DocSum
diff --git a/ProductivitySuite/ui/react/src/components/DocSum/FileUpload.tsx b/ProductivitySuite/ui/react/src/components/DocSum/FileUpload.tsx
index 914ac87241..aa5d84a00f 100644
--- a/ProductivitySuite/ui/react/src/components/DocSum/FileUpload.tsx
+++ b/ProductivitySuite/ui/react/src/components/DocSum/FileUpload.tsx
@@ -76,4 +76,4 @@ export function FileUpload(props: Partial<DropzoneProps>) {
             </Group>
         </Dropzone>
     );
-}
\ No newline at end of file
+}
diff --git a/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss b/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss
index 399e979391..a5061d8151 100644
--- a/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss
+++ b/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss
@@ -38,6 +38,7 @@
         }
       }
       .docSumResult {
+        overflow-y: auto;
       }
     }
   }
diff --git a/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx b/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx
index ca731cbf8b..003223d7a6 100644
--- a/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx
+++ b/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx
@@ -7,7 +7,8 @@ import { fetchEventSource } from '@microsoft/fetch-event-source'
 import { notifications } from '@mantine/notifications'
 import { FAQ_GEN_URL } from '../../config'
 import { FileWithPath } from '@mantine/dropzone'
-
+import { conversationSelector } from '../../redux/Conversation/ConversationSlice';
+import { useAppSelector } from "../../redux/store";
 
 const FaqGen = () => {
     const [isFile, setIsFile] = useState<boolean>(false);
@@ -16,6 +17,7 @@ const FaqGen = () => {
     const [value, setValue] = useState<string>('');
     const [fileContent, setFileContent] = useState<string>('');
     const [response, setResponse] = useState<string>('');
+    const { model } = useAppSelector(conversationSelector);
     
     let messagesEnd:HTMLDivElement;
 
@@ -59,7 +61,8 @@ const FaqGen = () => {
 
     setIsGenerating(true)
     const body = {
-            messages: isFile ? fileContent : value
+            messages: isFile ? fileContent : value,
+            model: model
     }
     fetchEventSource(FAQ_GEN_URL, {
         method: "POST",
@@ -90,7 +93,7 @@ const FaqGen = () => {
                             if (
                                 log.value !== "</s>" && log.path.endsWith("/streamed_output/-") && log.path.length > "/streamed_output/-".length
                             ) {
-                               setResponse(prev=>prev+log.value);
+                                setResponse(prev => prev + log.value.replace("<|eot_id|>", "").replace(/\\n/g, "\n"));
                             }
                         }
                     });
@@ -164,4 +167,4 @@ const FaqGen = () => {
     )
 }
 
-export default FaqGen;
\ No newline at end of file
+export default FaqGen;
diff --git a/ProductivitySuite/ui/react/src/components/FaqGen/FileUpload.tsx b/ProductivitySuite/ui/react/src/components/FaqGen/FileUpload.tsx
index 914ac87241..aa5d84a00f 100644
--- a/ProductivitySuite/ui/react/src/components/FaqGen/FileUpload.tsx
+++ b/ProductivitySuite/ui/react/src/components/FaqGen/FileUpload.tsx
@@ -76,4 +76,4 @@ export function FileUpload(props: Partial<DropzoneProps>) {
             </Group>
         </Dropzone>
     );
-}
\ No newline at end of file
+}
diff --git a/ProductivitySuite/ui/react/src/components/Shared/CodeRender/CodeRender.tsx b/ProductivitySuite/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
index 479034cece..a21f7acc59 100644
--- a/ProductivitySuite/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
+++ b/ProductivitySuite/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
@@ -49,4 +49,4 @@ const CodeRender = ({ cleanCode, language, inline }:CodeRenderProps) => {
 }
 
 
-export default CodeRender;
\ No newline at end of file
+export default CodeRender;
diff --git a/ProductivitySuite/ui/react/src/components/Shared/Markdown/Markdown.tsx b/ProductivitySuite/ui/react/src/components/Shared/Markdown/Markdown.tsx
index 6331c6d08d..2726e14b2e 100644
--- a/ProductivitySuite/ui/react/src/components/Shared/Markdown/Markdown.tsx
+++ b/ProductivitySuite/ui/react/src/components/Shared/Markdown/Markdown.tsx
@@ -59,4 +59,4 @@ const Markdown = ({ content }: MarkdownProps) => {
             />)
 }
 
-export default Markdown;
\ No newline at end of file
+export default Markdown;
diff --git a/ProductivitySuite/ui/react/src/redux/Conversation/Conversation.ts b/ProductivitySuite/ui/react/src/redux/Conversation/Conversation.ts
index 57ebb5ece2..7d18715dff 100644
--- a/ProductivitySuite/ui/react/src/redux/Conversation/Conversation.ts
+++ b/ProductivitySuite/ui/react/src/redux/Conversation/Conversation.ts
@@ -30,6 +30,13 @@ type file = {
   name: string;
 };
 
+export type Model = {
+  model_name: string;
+  displayName: string;
+  minToken: number;
+  maxToken: number;
+};
+
 export interface ConversationReducer {
   selectedConversationId: string;
   conversations: Conversation[];
@@ -37,6 +44,7 @@ export interface ConversationReducer {
   onGoingResult: string;
   filesInDataSource: file[];
   systemPrompt: string;
+  models: Model[];
   model: string;
   minToken: number;
   maxToken: number;
diff --git a/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts b/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts
index ea7617d82c..6d290b8436 100644
--- a/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts
+++ b/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts
@@ -18,6 +18,7 @@ import {
   CHAT_HISTORY_GET,
   CHAT_HISTORY_DELETE,
 } from "../../config";
+import { Model } from "./Conversation";
 
 const initialState: ConversationReducer = {
   conversations: [],
@@ -25,6 +26,7 @@ const initialState: ConversationReducer = {
   selectedConversationHistory: [],
   onGoingResult: "",
   filesInDataSource: [],
+  models: [],
   model: "Intel/neural-chat-7b-v3-3",
   systemPrompt: "You are helpful assistant",
   minToken: 100,
@@ -68,6 +70,18 @@ export const ConversationSlice = createSlice({
     setSystemPrompt: (state, action: PayloadAction<string>) => {
       state.systemPrompt = action.payload;
     },
+    setModel: (state, action: PayloadAction<string>) => {
+      state.model = action.payload;
+    },
+    setMinToken: (state, action: PayloadAction<number>) => {
+      state.minToken = action.payload;
+    },
+    setMaxToken: (state, action: PayloadAction<number>) => {
+      state.maxToken = action.payload;
+    },
+    setModels: (state, action: PayloadAction<Model[]>) => {
+      state.models = action.payload;
+    },
   },
   extraReducers(builder) {
     builder.addCase(uploadFile.fulfilled, () => {
@@ -231,7 +245,9 @@ export const deleteConversation = createAsyncThunkWrapper(
 
 export const doConversation = (conversationRequest: ConversationRequest) => {
   const { conversationId, userPrompt, messages, model, token, temperature } = conversationRequest;
-  store.dispatch(addMessageToMessages(messages[0]));
+  if (messages.length == 1) {
+    store.dispatch(addMessageToMessages(messages[0]));
+  }
   store.dispatch(addMessageToMessages(userPrompt));
   const userPromptWithoutTime = {
     role: userPrompt.role,
@@ -240,7 +256,7 @@ export const doConversation = (conversationRequest: ConversationRequest) => {
   const body = {
     messages: [...messages, userPromptWithoutTime],
     model,
-    max_new_tokens: token,
+    max_tokens: token,
     temperature: temperature,
   };
 
@@ -271,7 +287,7 @@ export const doConversation = (conversationRequest: ConversationRequest) => {
             const match = msg.data.match(/b'([^']*)'/);
             if (match && match[1] != "</s>") {
               const extractedText = match[1];
-              result += extractedText;
+              result += extractedText.replace("<|eot_id|>", "").replace(/\\n/g, "\n");
               store.dispatch(setOnGoingResult(result));
             }
           } catch (e) {
@@ -321,6 +337,10 @@ export const {
   setTemperature,
   setToken,
   setSystemPrompt,
+  setModel,
+  setMinToken,
+  setMaxToken,
+  setModels,
 } = ConversationSlice.actions;
 export const conversationSelector = (state: RootState) => state.conversationReducer;
 export default ConversationSlice.reducer;
diff --git a/ProductivitySuite/ui/react/src/styles/components/context.module.scss b/ProductivitySuite/ui/react/src/styles/components/context.module.scss
index e2d3caafaa..cac58cdfdf 100644
--- a/ProductivitySuite/ui/react/src/styles/components/context.module.scss
+++ b/ProductivitySuite/ui/react/src/styles/components/context.module.scss
@@ -15,7 +15,7 @@
     "settings";
 
   grid-template-columns: auto;
-  grid-template-rows: 70px 1fr 175px;
+  grid-template-rows: 70px 1fr 240px;
 
   .contextTitle {
     grid-area: title;
@@ -30,6 +30,7 @@
     width: 100%;
     height: 60px;
     border-bottom: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-7));
+    font-size: 1.3vw !important;
   }
 
   .contextList {
diff --git a/README.md b/README.md
index 87581d3dda..a341663875 100644
--- a/README.md
+++ b/README.md
@@ -37,18 +37,19 @@ Deployment are based on released docker images by default, check [docker image l
 
 #### Deploy Examples
 
-| Use Case          | Docker Compose<br/>Deployment on Xeon                                          | Docker Compose<br/>Deployment on Gaudi                                     | Kubernetes with Manifests                                                        | Kubernetes with Helm Charts                                                                                        | Kubernetes with GMC                                                |
-| ----------------- | ------------------------------------------------------------------------------ | -------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------ |
-| ChatQnA           | [Xeon Instructions](ChatQnA/docker_compose/intel/cpu/xeon/README.md)           | [Gaudi Instructions](ChatQnA/docker_compose/intel/hpu/gaudi/README.md)     | [ChatQnA with Manifests](ChatQnA/kubernetes/intel/README.md)                     | [ChatQnA with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md)     | [ChatQnA with GMC](ChatQnA/kubernetes/intel/README_gmc.md)         |
-| CodeGen           | [Xeon Instructions](CodeGen/docker_compose/intel/cpu/xeon/README.md)           | [Gaudi Instructions](CodeGen/docker_compose/intel/hpu/gaudi/README.md)     | [CodeGen with Manifests](CodeGen/kubernetes/intel/README.md)                     | [CodeGen with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codegen/README.md)     | [CodeGen with GMC](CodeGen/kubernetes/intel/README_gmc.md)         |
-| CodeTrans         | [Xeon Instructions](CodeTrans/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](CodeTrans/docker_compose/intel/hpu/gaudi/README.md)   | [CodeTrans with Manifests](CodeTrans/kubernetes/intel/README.md)                 | [CodeTrans with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codetrans/README.md) | [CodeTrans with GMC](CodeTrans/kubernetes/intel/README_gmc.md)     |
-| DocSum            | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md)      | [DocSum with Manifests](DocSum/kubernetes/intel/README.md)                       | [DocSum with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md)       | [DocSum with GMC](DocSum/kubernetes/intel/README_gmc.md)           |
-| SearchQnA         | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md)   | Not Supported                                                                    | Not Supported                                                                                                      | [SearchQnA with GMC](SearchQnA/kubernetes/intel/README_gmc.md)     |
-| FaqGen            | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md)      | [FaqGen with Manifests](FaqGen/kubernetes/intel/README.md)                       | Not Supported                                                                                                      | [FaqGen with GMC](FaqGen/kubernetes/intel/README_gmc.md)           |
-| Translation       | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md)       | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md) | [Translation with Manifests](Translation/kubernetes/intel/README.md)             | Not Supported                                                                                                      | [Translation with GMC](Translation/kubernetes/intel/README_gmc.md) |
-| AudioQnA          | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md)          | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md)    | [AudioQnA with Manifests](AudioQnA/kubernetes/intel/README.md)                   | Not Supported                                                                                                      | [AudioQnA with GMC](AudioQnA/kubernetes/intel/README_gmc.md)       |
-| VisualQnA         | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md)   | [VisualQnA with Manifests](VisualQnA/kubernetes/intel/README.md)                 | Not Supported                                                                                                      | [VisualQnA with GMC](VisualQnA/kubernetes/intel/README_gmc.md)     |
-| ProductivitySuite | [Xeon Instructions](ProductivitySuite/docker_compose/intel/cpu/xeon/README.md) | Not Supported                                                              | [ProductivitySuite with Manifests](ProductivitySuite/kubernetes/intel/README.md) | Not Supported                                                                                                      | Not Supported                                                      |
+| Use Case          | Docker Compose<br/>Deployment on Xeon                                          | Docker Compose<br/>Deployment on Gaudi                                       | Kubernetes with Manifests                                                        | Kubernetes with Helm Charts                                                                                        | Kubernetes with GMC                                                |
+| ----------------- | ------------------------------------------------------------------------------ | ---------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------ |
+| ChatQnA           | [Xeon Instructions](ChatQnA/docker_compose/intel/cpu/xeon/README.md)           | [Gaudi Instructions](ChatQnA/docker_compose/intel/hpu/gaudi/README.md)       | [ChatQnA with Manifests](ChatQnA/kubernetes/intel/README.md)                     | [ChatQnA with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md)     | [ChatQnA with GMC](ChatQnA/kubernetes/intel/README_gmc.md)         |
+| CodeGen           | [Xeon Instructions](CodeGen/docker_compose/intel/cpu/xeon/README.md)           | [Gaudi Instructions](CodeGen/docker_compose/intel/hpu/gaudi/README.md)       | [CodeGen with Manifests](CodeGen/kubernetes/intel/README.md)                     | [CodeGen with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codegen/README.md)     | [CodeGen with GMC](CodeGen/kubernetes/intel/README_gmc.md)         |
+| CodeTrans         | [Xeon Instructions](CodeTrans/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](CodeTrans/docker_compose/intel/hpu/gaudi/README.md)     | [CodeTrans with Manifests](CodeTrans/kubernetes/intel/README.md)                 | [CodeTrans with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codetrans/README.md) | [CodeTrans with GMC](CodeTrans/kubernetes/intel/README_gmc.md)     |
+| DocSum            | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md)        | [DocSum with Manifests](DocSum/kubernetes/intel/README.md)                       | [DocSum with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md)       | [DocSum with GMC](DocSum/kubernetes/intel/README_gmc.md)           |
+| SearchQnA         | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md)     | Not Supported                                                                    | Not Supported                                                                                                      | [SearchQnA with GMC](SearchQnA/kubernetes/intel/README_gmc.md)     |
+| FaqGen            | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md)        | [FaqGen with Manifests](FaqGen/kubernetes/intel/README.md)                       | Not Supported                                                                                                      | [FaqGen with GMC](FaqGen/kubernetes/intel/README_gmc.md)           |
+| Translation       | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md)       | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md)   | [Translation with Manifests](Translation/kubernetes/intel/README.md)             | Not Supported                                                                                                      | [Translation with GMC](Translation/kubernetes/intel/README_gmc.md) |
+| AudioQnA          | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md)          | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md)      | [AudioQnA with Manifests](AudioQnA/kubernetes/intel/README.md)                   | Not Supported                                                                                                      | [AudioQnA with GMC](AudioQnA/kubernetes/intel/README_gmc.md)       |
+| VisualQnA         | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md)     | [VisualQnA with Manifests](VisualQnA/kubernetes/intel/README.md)                 | Not Supported                                                                                                      | [VisualQnA with GMC](VisualQnA/kubernetes/intel/README_gmc.md)     |
+| MultimodalQnA     | [Xeon Instructions](MultimodalQnA/docker_compose/intel/cpu/xeon/README.md)     | [Gaudi Instructions](MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md) | Not supported                                                                    | Not supported                                                                                                      | Not supported                                                      |
+| ProductivitySuite | [Xeon Instructions](ProductivitySuite/docker_compose/intel/cpu/xeon/README.md) | Not Supported                                                                | [ProductivitySuite with Manifests](ProductivitySuite/kubernetes/intel/README.md) | Not Supported                                                                                                      | Not Supported                                                      |
 
 ## Supported Examples
 
diff --git a/SearchQnA/Dockerfile b/SearchQnA/Dockerfile
index 3b76d046b6..2d8e59f6b5 100644
--- a/SearchQnA/Dockerfile
+++ b/SearchQnA/Dockerfile
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./searchqna.py /home/user/searchqna.py
diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 0b7995118d..53be5846e3 100644
--- a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -73,7 +73,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/README.md b/SearchQnA/docker_compose/intel/hpu/gaudi/README.md
index 7870aa629f..8777e77863 100644
--- a/SearchQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/SearchQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -38,14 +38,14 @@ To construct the Mega Service, we utilize the [GenAIComps](https://github.com/op
 
 ```bash
 git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/SearchQnA/docker
+cd GenAIExamples/SearchQnA
 docker build --no-cache -t opea/searchqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
 ```
 
 Then you need to build the last Docker image `opea/searchqna:latest`, which represents the Mega service through following commands:
 
 ```bash
-cd GenAIExamples/SearchQnA/docker
+cd GenAIExamples/SearchQnA
 docker build --no-cache -t opea/searchqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
 ```
 
diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index 5ade94cc18..9e679179c8 100644
--- a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/tei-gaudi:1.5.0
     container_name: tei-embedding-gaudi-server
     ports:
       - "3001:80"
@@ -80,7 +80,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"
diff --git a/SearchQnA/tests/test_compose_on_gaudi.sh b/SearchQnA/tests/test_compose_on_gaudi.sh
index cefadaa88c..94014051e2 100644
--- a/SearchQnA/tests/test_compose_on_gaudi.sh
+++ b/SearchQnA/tests/test_compose_on_gaudi.sh
@@ -23,8 +23,8 @@ function build_docker_images() {
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
-    docker pull ghcr.io/huggingface/tei-gaudi:latest
+    docker pull ghcr.io/huggingface/tei-gaudi:1.5.0
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker images && sleep 1s
 }
 
diff --git a/SearchQnA/tests/test_compose_on_xeon.sh b/SearchQnA/tests/test_compose_on_xeon.sh
index 5436cc1c50..6c73833acc 100644
--- a/SearchQnA/tests/test_compose_on_xeon.sh
+++ b/SearchQnA/tests/test_compose_on_xeon.sh
@@ -23,7 +23,7 @@ function build_docker_images() {
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/SearchQnA/ui/docker/Dockerfile b/SearchQnA/ui/docker/Dockerfile
index ac2bb7da31..1d5115f4b5 100644
--- a/SearchQnA/ui/docker/Dockerfile
+++ b/SearchQnA/ui/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN npm run build
 EXPOSE 5173
 
 # Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
\ No newline at end of file
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
diff --git a/SearchQnA/ui/svelte/.env b/SearchQnA/ui/svelte/.env
index 89db8c4674..49f7b9dd51 100644
--- a/SearchQnA/ui/svelte/.env
+++ b/SearchQnA/ui/svelte/.env
@@ -1 +1 @@
-BACKEND_BASE_URL = 'http://backend_address:3008/v1/searchqna'
\ No newline at end of file
+BACKEND_BASE_URL = 'http://backend_address:3008/v1/searchqna'
diff --git a/SearchQnA/ui/svelte/src/app.postcss b/SearchQnA/ui/svelte/src/app.postcss
index 1bb14630c8..963bbca4ef 100644
--- a/SearchQnA/ui/svelte/src/app.postcss
+++ b/SearchQnA/ui/svelte/src/app.postcss
@@ -83,4 +83,4 @@ a.btn {
 
 .w-12\/12 {
 	width: 100%
-}
\ No newline at end of file
+}
diff --git a/SearchQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg b/SearchQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg
index 9fe89acc1f..8910f0ea64 100644
--- a/SearchQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg
+++ b/SearchQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg
@@ -1 +1 @@
-<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1699596229588" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="20460" xmlns:xlink="http://www.w3.org/1999/xlink" width="32" height="32"><path d="M576 128a96 96 0 0 1 96 96v128h-224a96 96 0 0 0-95.84 90.368L352 448v224H224a96 96 0 0 1-96-96V224a96 96 0 0 1 96-96h352z" fill="#CCD9FF" p-id="20461"></path><path d="M576 96a128 128 0 0 1 128 128v128h-64V224a64 64 0 0 0-59.2-63.84L576 160H224a64 64 0 0 0-64 64v352a64 64 0 0 0 64 64h128v64H224a128 128 0 0 1-128-128V224a128 128 0 0 1 128-128z" fill="#3671FD" p-id="20462"></path><path d="M800 320H448a128 128 0 0 0-128 128v352a128 128 0 0 0 128 128h352a128 128 0 0 0 128-128V448a128 128 0 0 0-128-128z m-352 64h352a64 64 0 0 1 64 64v352a64 64 0 0 1-64 64H448a64 64 0 0 1-64-64V448a64 64 0 0 1 64-64z" fill="#3671FD" p-id="20463"></path><path d="M128 736a32 32 0 0 1 32 32 96 96 0 0 0 90.368 95.84L256 864a32 32 0 0 1 0 64 160 160 0 0 1-160-160 32 32 0 0 1 32-32z" fill="#FE9C23" p-id="20464"></path></svg>
\ No newline at end of file
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1699596229588" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="20460" xmlns:xlink="http://www.w3.org/1999/xlink" width="32" height="32"><path d="M576 128a96 96 0 0 1 96 96v128h-224a96 96 0 0 0-95.84 90.368L352 448v224H224a96 96 0 0 1-96-96V224a96 96 0 0 1 96-96h352z" fill="#CCD9FF" p-id="20461"></path><path d="M576 96a128 128 0 0 1 128 128v128h-64V224a64 64 0 0 0-59.2-63.84L576 160H224a64 64 0 0 0-64 64v352a64 64 0 0 0 64 64h128v64H224a128 128 0 0 1-128-128V224a128 128 0 0 1 128-128z" fill="#3671FD" p-id="20462"></path><path d="M800 320H448a128 128 0 0 0-128 128v352a128 128 0 0 0 128 128h352a128 128 0 0 0 128-128V448a128 128 0 0 0-128-128z m-352 64h352a64 64 0 0 1 64 64v352a64 64 0 0 1-64 64H448a64 64 0 0 1-64-64V448a64 64 0 0 1 64-64z" fill="#3671FD" p-id="20463"></path><path d="M128 736a32 32 0 0 1 32 32 96 96 0 0 0 90.368 95.84L256 864a32 32 0 0 1 0 64 160 160 0 0 1-160-160 32 32 0 0 1 32-32z" fill="#FE9C23" p-id="20464"></path></svg>
diff --git a/SearchQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg b/SearchQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
index 362a6994eb..9a77286a8f 100644
--- a/SearchQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
+++ b/SearchQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
@@ -1 +1 @@
-<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="21"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="0.5" x2="0.5" y1="1" y2="0.054371078"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="28"><g node-id="29"><g node-id="30"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#909efc" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="13" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="31"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#909efc" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="14" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="32"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="15" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="33"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="16" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#909efc" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="17" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 193.00 208.00 L 281.00 208.00 L 281.00 291.00 L 193.00 291.00 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="矩形" node-id="18" stroke="none" target-height="83" target-width="88" target-x="193" target-y="208"></path><path d="M 204.34 187.40 L 205.82 184.30 L 207.42 181.42 L 209.93 177.40 L 212.87 173.22 L 216.68 168.52 L 221.00 163.96 L 226.13 159.47 L 229.84 156.76 L 233.89 154.30 L 238.30 152.10 L 242.89 150.34 L 247.86 149.04 L 253.23 148.21 L 257.26 147.99 L 261.54 148.14 L 266.10 148.68 L 270.96 149.65 L 275.71 151.04 L 280.04 152.70 L 283.99 154.61 L 287.59 156.76 L 292.14 160.10 L 296.15 163.69 L 299.67 167.56 L 302.85 171.73 L 305.60 175.97 L 307.94 180.29 L 310.91 186.93 L 313.11 193.22 L 314.81 199.39 L 315.91 204.60 L 316.69 209.44 L 317.06 212.71 L 317.29 215.80 L 320.19 216.75 L 323.18 217.90 L 327.40 219.80 L 331.79 222.15 L 336.64 225.29 L 339.75 227.69 L 342.75 230.44 L 345.64 233.56 L 348.18 236.94 L 350.35 240.71 L 352.13 244.92 L 353.09 248.17 L 353.71 251.69 L 353.97 255.53 L 353.85 259.71 L 353.08 265.18 L 351.88 270.02 L 350.29 274.31 L 348.26 278.41 L 346.02 282.02 L 343.58 285.18 L 340.85 288.13 L 338.12 290.66 L 335.39 292.82 L 331.16 295.64 L 327.45 297.68 L 323.90 299.30 L 321.43 300.25 L 319.05 301.00 L 319.05 301.00 L 251.99 301.00 L 251.99 301.00 L 263.46 301.00 L 263.46 290.63 L 251.99 290.63 L 251.99 260.46 L 268.27 260.46 L 268.27 260.39 L 268.34 260.43 L 269.03 260.29 L 269.57 259.91 L 269.94 259.36 L 270.08 258.68 L 269.82 257.84 L 269.82 257.84 L 269.92 257.84 L 269.63 257.54 L 269.48 257.40 L 269.48 257.40 L 239.43 225.61 L 238.65 224.96 L 237.66 224.72 L 236.69 224.94 L 235.94 225.52 L 235.94 225.52 L 205.66 257.57 L 205.17 258.11 L 204.98 258.82 L 205.11 259.44 L 205.43 259.93 L 205.91 260.25 L 206.51 260.38 L 206.51 260.38 L 206.51 260.45 L 223.60 260.45 L 223.60 290.63 L 211.72 290.63 L 211.72 300.98 L 149.72 300.98 L 147.36 300.29 L 141.43 298.02 L 137.76 296.26 L 133.63 293.89 L 129.60 291.08 L 125.67 287.63 L 123.28 285.04 L 121.15 282.15 L 119.25 278.95 L 117.77 275.54 L 116.71 271.77 L 116.09 267.57 L 116.03 263.33 L 116.60 258.57 L 117.89 253.21 L 118.25 252.08 L 120.57 245.82 L 122.94 240.56 L 125.36 236.19 L 128.13 232.00 L 130.82 228.57 L 133.44 225.80 L 136.31 223.27 L 138.99 221.28 L 141.49 219.76 L 145.33 217.95 L 148.49 216.90 L 151.46 216.26 L 153.44 216.03 L 155.31 216.01 L 154.82 213.95 L 154.48 211.78 L 154.26 208.66 L 154.37 205.32 L 155.00 201.50 L 155.74 198.98 L 156.87 196.42 L 158.41 193.81 L 160.28 191.44 L 162.73 189.15 L 165.85 186.93 L 170.12 184.73 L 174.13 183.25 L 177.93 182.41 L 181.82 182.04 L 185.38 182.08 L 188.64 182.49 L 193.30 183.66 L 196.92 185.10 L 200.02 186.82 L 201.70 188.13 L 202.29 188.24 L 203.46 188.19 L 203.98 187.92 L 204.34 187.40 Z" fill="#ffffff" fill-rule="nonzero" group-id="1" id="形状结合" node-id="19" stroke="none" target-height="153.01" target-width="237.94" target-x="116.03" target-y="147.99"></path></g></svg>
\ No newline at end of file
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="21"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="0.5" x2="0.5" y1="1" y2="0.054371078"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="28"><g node-id="29"><g node-id="30"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#909efc" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="13" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="31"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#909efc" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="14" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="32"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="15" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="33"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="16" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#909efc" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="17" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 193.00 208.00 L 281.00 208.00 L 281.00 291.00 L 193.00 291.00 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="矩形" node-id="18" stroke="none" target-height="83" target-width="88" target-x="193" target-y="208"></path><path d="M 204.34 187.40 L 205.82 184.30 L 207.42 181.42 L 209.93 177.40 L 212.87 173.22 L 216.68 168.52 L 221.00 163.96 L 226.13 159.47 L 229.84 156.76 L 233.89 154.30 L 238.30 152.10 L 242.89 150.34 L 247.86 149.04 L 253.23 148.21 L 257.26 147.99 L 261.54 148.14 L 266.10 148.68 L 270.96 149.65 L 275.71 151.04 L 280.04 152.70 L 283.99 154.61 L 287.59 156.76 L 292.14 160.10 L 296.15 163.69 L 299.67 167.56 L 302.85 171.73 L 305.60 175.97 L 307.94 180.29 L 310.91 186.93 L 313.11 193.22 L 314.81 199.39 L 315.91 204.60 L 316.69 209.44 L 317.06 212.71 L 317.29 215.80 L 320.19 216.75 L 323.18 217.90 L 327.40 219.80 L 331.79 222.15 L 336.64 225.29 L 339.75 227.69 L 342.75 230.44 L 345.64 233.56 L 348.18 236.94 L 350.35 240.71 L 352.13 244.92 L 353.09 248.17 L 353.71 251.69 L 353.97 255.53 L 353.85 259.71 L 353.08 265.18 L 351.88 270.02 L 350.29 274.31 L 348.26 278.41 L 346.02 282.02 L 343.58 285.18 L 340.85 288.13 L 338.12 290.66 L 335.39 292.82 L 331.16 295.64 L 327.45 297.68 L 323.90 299.30 L 321.43 300.25 L 319.05 301.00 L 319.05 301.00 L 251.99 301.00 L 251.99 301.00 L 263.46 301.00 L 263.46 290.63 L 251.99 290.63 L 251.99 260.46 L 268.27 260.46 L 268.27 260.39 L 268.34 260.43 L 269.03 260.29 L 269.57 259.91 L 269.94 259.36 L 270.08 258.68 L 269.82 257.84 L 269.82 257.84 L 269.92 257.84 L 269.63 257.54 L 269.48 257.40 L 269.48 257.40 L 239.43 225.61 L 238.65 224.96 L 237.66 224.72 L 236.69 224.94 L 235.94 225.52 L 235.94 225.52 L 205.66 257.57 L 205.17 258.11 L 204.98 258.82 L 205.11 259.44 L 205.43 259.93 L 205.91 260.25 L 206.51 260.38 L 206.51 260.38 L 206.51 260.45 L 223.60 260.45 L 223.60 290.63 L 211.72 290.63 L 211.72 300.98 L 149.72 300.98 L 147.36 300.29 L 141.43 298.02 L 137.76 296.26 L 133.63 293.89 L 129.60 291.08 L 125.67 287.63 L 123.28 285.04 L 121.15 282.15 L 119.25 278.95 L 117.77 275.54 L 116.71 271.77 L 116.09 267.57 L 116.03 263.33 L 116.60 258.57 L 117.89 253.21 L 118.25 252.08 L 120.57 245.82 L 122.94 240.56 L 125.36 236.19 L 128.13 232.00 L 130.82 228.57 L 133.44 225.80 L 136.31 223.27 L 138.99 221.28 L 141.49 219.76 L 145.33 217.95 L 148.49 216.90 L 151.46 216.26 L 153.44 216.03 L 155.31 216.01 L 154.82 213.95 L 154.48 211.78 L 154.26 208.66 L 154.37 205.32 L 155.00 201.50 L 155.74 198.98 L 156.87 196.42 L 158.41 193.81 L 160.28 191.44 L 162.73 189.15 L 165.85 186.93 L 170.12 184.73 L 174.13 183.25 L 177.93 182.41 L 181.82 182.04 L 185.38 182.08 L 188.64 182.49 L 193.30 183.66 L 196.92 185.10 L 200.02 186.82 L 201.70 188.13 L 202.29 188.24 L 203.46 188.19 L 203.98 187.92 L 204.34 187.40 Z" fill="#ffffff" fill-rule="nonzero" group-id="1" id="形状结合" node-id="19" stroke="none" target-height="153.01" target-width="237.94" target-x="116.03" target-y="147.99"></path></g></svg>
diff --git a/Text2Image/ui/docker/Dockerfile b/Text2Image/ui/docker/Dockerfile
index ac2bb7da31..1d5115f4b5 100644
--- a/Text2Image/ui/docker/Dockerfile
+++ b/Text2Image/ui/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN npm run build
 EXPOSE 5173
 
 # Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
\ No newline at end of file
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
diff --git a/Text2Image/ui/svelte/src/app.postcss b/Text2Image/ui/svelte/src/app.postcss
index 1bb14630c8..963bbca4ef 100644
--- a/Text2Image/ui/svelte/src/app.postcss
+++ b/Text2Image/ui/svelte/src/app.postcss
@@ -83,4 +83,4 @@ a.btn {
 
 .w-12\/12 {
 	width: 100%
-}
\ No newline at end of file
+}
diff --git a/Translation/Dockerfile b/Translation/Dockerfile
index d0eef525da..33931689c1 100644
--- a/Translation/Dockerfile
+++ b/Translation/Dockerfile
@@ -28,7 +28,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./translation.py /home/user/translation.py
diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
index 108a5086d2..39ea18d460 100644
--- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "8008:80"
diff --git a/Translation/docker_compose/intel/hpu/gaudi/README.md b/Translation/docker_compose/intel/hpu/gaudi/README.md
index a9e807a127..23d7acf12f 100644
--- a/Translation/docker_compose/intel/hpu/gaudi/README.md
+++ b/Translation/docker_compose/intel/hpu/gaudi/README.md
@@ -35,7 +35,7 @@ To construct the Mega Service, we utilize the [GenAIComps](https://github.com/op
 
 ```bash
 git clone https://github.com/opea-project/GenAIExamples
-cd GenAIExamples/Translation/docker
+cd GenAIExamples/Translation
 docker build -t opea/translation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
 ```
 
diff --git a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml
index c470c441a0..eabae13217 100644
--- a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
     ports:
       - "8008:80"
diff --git a/Translation/kubernetes/intel/README.md b/Translation/kubernetes/intel/README.md
index 7ca89d3720..38402ad7f2 100644
--- a/Translation/kubernetes/intel/README.md
+++ b/Translation/kubernetes/intel/README.md
@@ -11,7 +11,7 @@
 ## Deploy On Xeon
 
 ```
-cd GenAIExamples/Translation/kubernetes/intel/cpu/xeon/manifests
+cd GenAIExamples/Translation/kubernetes/intel/cpu/xeon/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml
 kubectl apply -f translation.yaml
@@ -20,7 +20,7 @@ kubectl apply -f translation.yaml
 ## Deploy On Gaudi
 
 ```
-cd GenAIExamples/Translation/kubernetes/intel/hpu/gaudi/manifests
+cd GenAIExamples/Translation/kubernetes/intel/hpu/gaudi/manifest
 export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
 sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" translation.yaml
 kubectl apply -f translation.yaml
diff --git a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
index e30fee338e..9cc8c2798f 100644
--- a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
+++ b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
@@ -361,7 +361,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml b/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml
index a1da33b79a..25e39a7002 100644
--- a/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml
+++ b/Translation/kubernetes/intel/hpu/gaudi/manifest/translation.yaml
@@ -362,7 +362,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/tgi-gaudi:2.0.5"
+          image: "ghcr.io/huggingface/tgi-gaudi:2.0.6"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/Translation/tests/test_compose_on_gaudi.sh b/Translation/tests/test_compose_on_gaudi.sh
index 9515c95af7..fad64f5bab 100644
--- a/Translation/tests/test_compose_on_gaudi.sh
+++ b/Translation/tests/test_compose_on_gaudi.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="translation translation-ui llm-tgi nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker images && sleep 1s
 }
 
diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh
index 2d0c5306d5..b7fc6acb39 100644
--- a/Translation/tests/test_compose_on_xeon.sh
+++ b/Translation/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="translation translation-ui llm-tgi nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/Translation/translation.yaml b/Translation/translation.yaml
index 882eca8e29..f3a07da966 100644
--- a/Translation/translation.yaml
+++ b/Translation/translation.yaml
@@ -6,7 +6,7 @@ opea_micro_services:
   tgi-service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     volumes:
       - "./data:/data"
     runtime: habana
diff --git a/Translation/ui/svelte/.env b/Translation/ui/svelte/.env
index 4df6832379..fdb106b4b3 100644
--- a/Translation/ui/svelte/.env
+++ b/Translation/ui/svelte/.env
@@ -1 +1 @@
-BASE_URL = 'http://10.7.5.135:8888/v1/translation'
\ No newline at end of file
+BASE_URL = 'http://10.7.5.135:8888/v1/translation'
diff --git a/VideoQnA/Dockerfile b/VideoQnA/Dockerfile
index 9712d33981..bd1ff121f5 100644
--- a/VideoQnA/Dockerfile
+++ b/VideoQnA/Dockerfile
@@ -19,7 +19,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./videoqna.py /home/user/videoqna.py
diff --git a/VideoQnA/ui/docker/Dockerfile b/VideoQnA/ui/docker/Dockerfile
index 7220b28ac6..dcd029a0b8 100644
--- a/VideoQnA/ui/docker/Dockerfile
+++ b/VideoQnA/ui/docker/Dockerfile
@@ -18,4 +18,4 @@ EXPOSE 5173
 
 HEALTHCHECK CMD curl --fail http://localhost:5173/_stcore/health
 
-ENTRYPOINT ["streamlit", "run", "ui.py", "--server.port=5173", "--server.address=0.0.0.0"]
\ No newline at end of file
+ENTRYPOINT ["streamlit", "run", "ui.py", "--server.port=5173", "--server.address=0.0.0.0"]
diff --git a/VisualQnA/Dockerfile b/VisualQnA/Dockerfile
index 588cd379dc..ef6a2e2536 100644
--- a/VisualQnA/Dockerfile
+++ b/VisualQnA/Dockerfile
@@ -18,7 +18,7 @@ WORKDIR /home/user/
 RUN git clone https://github.com/opea-project/GenAIComps.git
 
 WORKDIR /home/user/GenAIComps
-RUN pip install --no-cache-dir --upgrade pip && \
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
     pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
 
 COPY ./visualqna.py /home/user/visualqna.py
diff --git a/VisualQnA/README.md b/VisualQnA/README.md
index 8446ee59fd..85dd02d9f9 100644
--- a/VisualQnA/README.md
+++ b/VisualQnA/README.md
@@ -13,6 +13,60 @@ General architecture of VQA shows below:
 
 ![VQA](./assets/img/vqa.png)
 
+The VisualQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
+
+```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 400
+    rankSpacing: 100
+    curve: linear
+  themeVariables:
+    fontSize: 50px
+---
+flowchart LR
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style VisualQnA-MegaService stroke:#000000
+
+    %% Subgraphs %%
+    subgraph VisualQnA-MegaService["VisualQnA MegaService "]
+        direction LR
+        LVM([LVM MicroService]):::blue
+    end
+    subgraph UserInterface[" User Interface "]
+        direction LR
+        a([User Input Query]):::orchid
+        Ingest([Ingest data]):::orchid
+        UI([UI server<br>]):::orchid
+    end
+
+
+    LVM_gen{{LVM Service <br>}}
+    GW([VisualQnA GateWay<br>]):::orange
+    NG([Nginx MicroService]):::blue
+
+
+    %% Questions interaction
+    direction LR
+    Ingest[Ingest data] --> UI
+    a[User Input Query] --> |Need Proxy Server|NG
+    a[User Input Query] --> UI
+    NG --> UI
+    UI --> GW
+    GW <==> VisualQnA-MegaService
+
+
+    %% Embedding service flow
+    direction LR
+    LVM <-.-> LVM_gen
+
+```
+
 This example guides you through how to deploy a [LLaVA-NeXT](https://github.com/LLaVA-VL/LLaVA-NeXT) (Open Large Multimodal Models) model on [Intel Gaudi2](https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi-overview.html) and [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon.html). We invite contributions from other hardware vendors to expand the OPEA ecosystem.
 
 ![llava screenshot](./assets/img/llava_screenshot1.png)
diff --git a/VisualQnA/benchmark/performance/README.md b/VisualQnA/benchmark/performance/README.md
new file mode 100644
index 0000000000..45e76558ca
--- /dev/null
+++ b/VisualQnA/benchmark/performance/README.md
@@ -0,0 +1,77 @@
+# VisualQnA Benchmarking
+
+This folder contains a collection of scripts to enable inference benchmarking by leveraging a comprehensive benchmarking tool, [GenAIEval](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md), that enables throughput analysis to assess inference performance.
+
+By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
+
+## Purpose
+
+We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
+
+- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
+- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
+- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
+
+## Metrics
+
+The benchmark will report the below metrics, including:
+
+- Number of Concurrent Requests
+- End-to-End Latency: P50, P90, P99 (in milliseconds)
+- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
+- Average Next Token Latency (in milliseconds)
+- Average Token Latency (in milliseconds)
+- Requests Per Second (RPS)
+- Output Tokens Per Second
+- Input Tokens Per Second
+
+Results will be displayed in the terminal and saved as CSV file named `1_testspec.yaml`.
+
+## Getting Started
+
+We recommend using Kubernetes to deploy the VisualQnA service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs.
+
+### Prerequisites
+
+- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
+
+- Every node has direct internet access
+- Set up kubectl on the master node with access to the Kubernetes cluster.
+- Install Python 3.8+ on the master node for running GenAIEval.
+- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
+- Ensure that the container's ulimit can meet the the number of requests.
+
+```bash
+# The way to modify the containered ulimit:
+sudo systemctl edit containerd
+# Add two lines:
+[Service]
+LimitNOFILE=65536:1048576
+
+sudo systemctl daemon-reload; sudo systemctl restart containerd
+```
+
+### Test Steps
+
+Please deploy VisualQnA service before benchmarking.
+
+#### Run Benchmark Test
+
+Before the benchmark, we can configure the number of test queries and test output directory by:
+
+```bash
+export USER_QUERIES="[1, 1, 1, 1]"
+export TEST_OUTPUT_DIR="/tmp/benchmark_output"
+```
+
+And then run the benchmark by:
+
+```bash
+bash benchmark.sh -n <node_count>
+```
+
+The argument `-n` refers to the number of test nodes.
+
+#### Data collection
+
+All the test results will come to this folder `/tmp/benchmark_output` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
diff --git a/VisualQnA/benchmark/performance/benchmark.sh b/VisualQnA/benchmark/performance/benchmark.sh
new file mode 100644
index 0000000000..44abdecbb1
--- /dev/null
+++ b/VisualQnA/benchmark/performance/benchmark.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+deployment_type="k8s"
+node_number=1
+service_port=8888
+query_per_node=128
+
+benchmark_tool_path="$(pwd)/GenAIEval"
+
+usage() {
+    echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
+    echo "  -d deployment_type    deployment type, select between k8s and docker (default: ${deployment_type})"
+    echo "  -n node_number        Test node number, required only for k8s deployment_type, (default: ${node_number})"
+    echo "  -i service_ip         service ip, required only for docker deployment_type"
+    echo "  -p service_port       service port, required only for docker deployment_type, (default: ${service_port})"
+    exit 1
+}
+
+while getopts ":d:n:i:p:" opt; do
+    case ${opt} in
+        d )
+            deployment_type=$OPTARG
+            ;;
+        n )
+            node_number=$OPTARG
+            ;;
+        i )
+            service_ip=$OPTARG
+            ;;
+        p )
+            service_port=$OPTARG
+            ;;
+        \? )
+            echo "Invalid option: -$OPTARG" 1>&2
+            usage
+            ;;
+        : )
+            echo "Invalid option: -$OPTARG requires an argument" 1>&2
+            usage
+            ;;
+    esac
+done
+
+if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
+    echo "Error: service_ip is required for docker deployment_type" 1>&2
+    usage
+fi
+
+if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
+    echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
+fi
+
+function main() {
+    if [[ ! -d ${benchmark_tool_path} ]]; then
+        echo "Benchmark tool not found, setting up..."
+        setup_env
+    fi
+    run_benchmark
+}
+
+function setup_env() {
+    git clone https://github.com/opea-project/GenAIEval.git
+    pushd ${benchmark_tool_path}
+    python3 -m venv stress_venv
+    source stress_venv/bin/activate
+    pip install -r requirements.txt
+    popd
+}
+
+function run_benchmark() {
+    source ${benchmark_tool_path}/stress_venv/bin/activate
+    export DEPLOYMENT_TYPE=${deployment_type}
+    export SERVICE_IP=${service_ip:-"None"}
+    export SERVICE_PORT=${service_port:-"None"}
+    if [[ -z $USER_QUERIES ]]; then
+        user_query=$((query_per_node*node_number))
+        export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
+        echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
+    fi
+    export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
+    if [[ -z $WARMUP ]]; then export WARMUP=0; fi
+    if [[ -z $TEST_OUTPUT_DIR ]]; then
+        if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
+        else
+            export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
+        fi
+        echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
+    fi
+
+    envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
+    cd ${benchmark_tool_path}/evals/benchmark
+    python benchmark.py
+}
+
+main
diff --git a/VisualQnA/benchmark/performance/benchmark.yaml b/VisualQnA/benchmark/performance/benchmark.yaml
new file mode 100644
index 0000000000..9ddf922936
--- /dev/null
+++ b/VisualQnA/benchmark/performance/benchmark.yaml
@@ -0,0 +1,47 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+test_suite_config: # Overall configuration settings for the test suite
+  examples: ["visualqna"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
+  deployment_type: "k8s"  # Default is "k8s", can also be "docker"
+  service_ip: None  # Leave as None for k8s, specify for Docker
+  service_port: None  # Leave as None for k8s, specify for Docker
+  warm_ups: 0  # Number of test requests for warm-up
+  run_time: 60m  # The max total run time for the test suite
+  seed:  # The seed for all RNGs
+  user_queries: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]  # Number of test requests at each concurrency level
+  query_timeout: 120  # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
+  random_prompt: false  # Use random prompts if true, fixed prompts if false
+  collect_service_metric: false  # Collect service metrics if true, do not collect service metrics if false
+  data_visualization: false # Generate data visualization if true, do not generate data visualization if false
+  llm_model: "llava-hf/llava-v1.6-mistral-7b-hf"  # The LLM model used for the test
+  test_output_dir: "/tmp/benchmark_output"  # The directory to store the test output
+  load_shape:              # Tenant concurrency pattern
+    name: constant           # poisson or constant(locust default load shape)
+    params:                  # Loadshape-specific parameters
+      constant:                # Constant load shape specific parameters, activate only if load_shape.name is constant
+        concurrent_level: 4      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
+        # arrival_rate: 1.0       # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
+      poisson:                 # Poisson load shape specific parameters, activate only if load_shape.name is poisson
+        arrival_rate: 1.0        # Request arrival rate
+  namespace: "" # Fill the user-defined namespace. Otherwise, it will be default.
+
+test_cases:
+  visualqna:
+    lvm:
+      run_test: true
+      service_name: "llm-svc"  # Replace with your service name
+      parameters:
+        model_name: "llava-hf/llava-v1.6-mistral-7b-hf"
+        max_new_tokens: 128
+        temperature: 0.01
+        top_k: 10
+        top_p: 0.95
+        repetition_penalty: 1.03
+        streaming: true
+    lvmserve:
+      run_test: true
+      service_name: "lvm-serving-svc"  # Replace with your service name
+    e2e:
+      run_test: true
+      service_name: "visualqna-backend-server-svc"  # Replace with your service name
diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/README.md b/VisualQnA/docker_compose/intel/cpu/xeon/README.md
index 8f0d5b6b34..eb1ef817b0 100644
--- a/VisualQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/README.md
@@ -67,12 +67,12 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt
 ### 4. Pull TGI Xeon Image
 
 ```bash
-docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
 ```
 
 Then run the command `docker images`, you will have the following 5 Docker Images:
 
-1. `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`
+1. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`
 2. `opea/lvm-tgi:latest`
 3. `opea/visualqna:latest`
 4. `opea/visualqna-ui:latest`
diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 553b13908b..33b5e189b1 100644
--- a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   llava-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-llava-xeon-server
     ports:
       - "8399:80"
diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/README.md b/VisualQnA/docker_compose/intel/hpu/gaudi/README.md
index 84783353ad..abb341f283 100644
--- a/VisualQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/VisualQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -18,7 +18,7 @@ docker build --no-cache -t opea/nginx:latest --build-arg https_proxy=$https_prox
 ### 2. Pull TGI Gaudi Image
 
 ```bash
-docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
 ```
 
 ### 3. Build MegaService Docker Image
@@ -43,7 +43,7 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt
 
 Then run the command `docker images`, you will have the following 5 Docker Images:
 
-1. `ghcr.io/huggingface/tgi-gaudi:2.0.5`
+1. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
 2. `opea/lvm-tgi:latest`
 3. `opea/visualqna:latest`
 4. `opea/visualqna-ui:latest`
diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index 45732e832f..bd587aa6fc 100644
--- a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   llava-tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-llava-gaudi-server
     ports:
       - "8399:80"
diff --git a/VisualQnA/kubernetes/intel/README.md b/VisualQnA/kubernetes/intel/README.md
index a09385bb8f..4a8b0a1fcf 100644
--- a/VisualQnA/kubernetes/intel/README.md
+++ b/VisualQnA/kubernetes/intel/README.md
@@ -8,14 +8,14 @@
 ## Deploy On Xeon
 
 ```
-cd GenAIExamples/visualqna/kubernetes/intel/cpu/xeon/manifests
+cd GenAIExamples/visualqna/kubernetes/intel/cpu/xeon/manifest
 kubectl apply -f visualqna.yaml
 ```
 
 ## Deploy On Gaudi
 
 ```
-cd GenAIExamples/visualqna/kubernetes/intel/hpu/gaudi/manifests
+cd GenAIExamples/visualqna/kubernetes/intel/hpu/gaudi/manifest
 kubectl apply -f visualqna.yaml
 ```
 
diff --git a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
index 4d3ee3bf24..1f1b2d316a 100644
--- a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
+++ b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
@@ -216,7 +216,7 @@ spec:
                 name: visualqna-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/VisualQnA/tests/test_compose_on_gaudi.sh b/VisualQnA/tests/test_compose_on_gaudi.sh
index a489a2c7ad..15f9fe7f21 100644
--- a/VisualQnA/tests/test_compose_on_gaudi.sh
+++ b/VisualQnA/tests/test_compose_on_gaudi.sh
@@ -21,7 +21,7 @@ function build_docker_images() {
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
     docker images && sleep 1s
 }
 
diff --git a/VisualQnA/tests/test_compose_on_xeon.sh b/VisualQnA/tests/test_compose_on_xeon.sh
index 8829896387..4d9c194833 100644
--- a/VisualQnA/tests/test_compose_on_xeon.sh
+++ b/VisualQnA/tests/test_compose_on_xeon.sh
@@ -21,7 +21,7 @@ function build_docker_images() {
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/VisualQnA/ui/docker/Dockerfile b/VisualQnA/ui/docker/Dockerfile
index ac2bb7da31..1d5115f4b5 100644
--- a/VisualQnA/ui/docker/Dockerfile
+++ b/VisualQnA/ui/docker/Dockerfile
@@ -23,4 +23,4 @@ RUN npm run build
 EXPOSE 5173
 
 # Run the front-end application in preview mode
-CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
\ No newline at end of file
+CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
diff --git a/VisualQnA/ui/svelte/.env b/VisualQnA/ui/svelte/.env
index e3086b4e04..ce3ce40333 100644
--- a/VisualQnA/ui/svelte/.env
+++ b/VisualQnA/ui/svelte/.env
@@ -1 +1 @@
-BACKEND_BASE_URL = '/v1/visualqna'
\ No newline at end of file
+BACKEND_BASE_URL = '/v1/visualqna'
diff --git a/VisualQnA/ui/svelte/package.json b/VisualQnA/ui/svelte/package.json
index 6444d18c14..e2a39a2c4d 100644
--- a/VisualQnA/ui/svelte/package.json
+++ b/VisualQnA/ui/svelte/package.json
@@ -15,8 +15,7 @@
     "@fortawesome/free-solid-svg-icons": "6.2.0",
     "@playwright/test": "^1.33.0",
     "@sveltejs/adapter-auto": "1.0.0-next.75",
-    "@sveltejs/adapter-static": "^3.0.0",
-    "@sveltejs/kit": "^2.0.0",
+    "@sveltejs/kit": "^1.30.4",
     "@tailwindcss/typography": "0.5.7",
     "@types/debug": "4.1.7",
     "@types/node": "^20.12.13",
@@ -29,20 +28,21 @@
     "eslint": "^8.16.0",
     "eslint-config-prettier": "^8.3.0",
     "eslint-plugin-neverthrow": "1.1.4",
+    "eslint-plugin-svelte3": "^4.0.0",
     "postcss": "^8.4.31",
     "postcss-load-config": "^4.0.1",
     "postcss-preset-env": "^8.3.2",
     "prettier": "^2.8.8",
     "prettier-plugin-svelte": "^2.7.0",
     "prettier-plugin-tailwindcss": "^0.3.0",
-    "svelte": "^4.0.0",
-    "svelte-check": "^3.0.0",
+    "svelte": "^3.59.1",
+    "svelte-check": "^2.7.1",
     "svelte-fa": "3.0.3",
-    "svelte-preprocess": "^6.0.2",
+    "svelte-preprocess": "^4.10.7",
     "tailwindcss": "^3.1.5",
     "tslib": "^2.3.1",
-    "typescript": "^5.0.0",
-    "vite": "^5.0.0"
+    "typescript": "^4.7.4",
+    "vite": "^4.5.2"
   },
   "type": "module",
   "dependencies": {
diff --git a/VisualQnA/ui/svelte/src/app.postcss b/VisualQnA/ui/svelte/src/app.postcss
index 1bb14630c8..963bbca4ef 100644
--- a/VisualQnA/ui/svelte/src/app.postcss
+++ b/VisualQnA/ui/svelte/src/app.postcss
@@ -83,4 +83,4 @@ a.btn {
 
 .w-12\/12 {
 	width: 100%
-}
\ No newline at end of file
+}
diff --git a/VisualQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg b/VisualQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg
index 9fe89acc1f..8910f0ea64 100644
--- a/VisualQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg
+++ b/VisualQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg
@@ -1 +1 @@
-<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1699596229588" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="20460" xmlns:xlink="http://www.w3.org/1999/xlink" width="32" height="32"><path d="M576 128a96 96 0 0 1 96 96v128h-224a96 96 0 0 0-95.84 90.368L352 448v224H224a96 96 0 0 1-96-96V224a96 96 0 0 1 96-96h352z" fill="#CCD9FF" p-id="20461"></path><path d="M576 96a128 128 0 0 1 128 128v128h-64V224a64 64 0 0 0-59.2-63.84L576 160H224a64 64 0 0 0-64 64v352a64 64 0 0 0 64 64h128v64H224a128 128 0 0 1-128-128V224a128 128 0 0 1 128-128z" fill="#3671FD" p-id="20462"></path><path d="M800 320H448a128 128 0 0 0-128 128v352a128 128 0 0 0 128 128h352a128 128 0 0 0 128-128V448a128 128 0 0 0-128-128z m-352 64h352a64 64 0 0 1 64 64v352a64 64 0 0 1-64 64H448a64 64 0 0 1-64-64V448a64 64 0 0 1 64-64z" fill="#3671FD" p-id="20463"></path><path d="M128 736a32 32 0 0 1 32 32 96 96 0 0 0 90.368 95.84L256 864a32 32 0 0 1 0 64 160 160 0 0 1-160-160 32 32 0 0 1 32-32z" fill="#FE9C23" p-id="20464"></path></svg>
\ No newline at end of file
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg t="1699596229588" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="20460" xmlns:xlink="http://www.w3.org/1999/xlink" width="32" height="32"><path d="M576 128a96 96 0 0 1 96 96v128h-224a96 96 0 0 0-95.84 90.368L352 448v224H224a96 96 0 0 1-96-96V224a96 96 0 0 1 96-96h352z" fill="#CCD9FF" p-id="20461"></path><path d="M576 96a128 128 0 0 1 128 128v128h-64V224a64 64 0 0 0-59.2-63.84L576 160H224a64 64 0 0 0-64 64v352a64 64 0 0 0 64 64h128v64H224a128 128 0 0 1-128-128V224a128 128 0 0 1 128-128z" fill="#3671FD" p-id="20462"></path><path d="M800 320H448a128 128 0 0 0-128 128v352a128 128 0 0 0 128 128h352a128 128 0 0 0 128-128V448a128 128 0 0 0-128-128z m-352 64h352a64 64 0 0 1 64 64v352a64 64 0 0 1-64 64H448a64 64 0 0 1-64-64V448a64 64 0 0 1 64-64z" fill="#3671FD" p-id="20463"></path><path d="M128 736a32 32 0 0 1 32 32 96 96 0 0 0 90.368 95.84L256 864a32 32 0 0 1 0 64 160 160 0 0 1-160-160 32 32 0 0 1 32-32z" fill="#FE9C23" p-id="20464"></path></svg>
diff --git a/VisualQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg b/VisualQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
index 362a6994eb..9a77286a8f 100644
--- a/VisualQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
+++ b/VisualQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg
@@ -1 +1 @@
-<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="21"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="0.5" x2="0.5" y1="1" y2="0.054371078"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="28"><g node-id="29"><g node-id="30"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#909efc" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="13" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="31"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#909efc" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="14" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="32"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="15" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="33"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="16" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#909efc" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="17" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 193.00 208.00 L 281.00 208.00 L 281.00 291.00 L 193.00 291.00 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="矩形" node-id="18" stroke="none" target-height="83" target-width="88" target-x="193" target-y="208"></path><path d="M 204.34 187.40 L 205.82 184.30 L 207.42 181.42 L 209.93 177.40 L 212.87 173.22 L 216.68 168.52 L 221.00 163.96 L 226.13 159.47 L 229.84 156.76 L 233.89 154.30 L 238.30 152.10 L 242.89 150.34 L 247.86 149.04 L 253.23 148.21 L 257.26 147.99 L 261.54 148.14 L 266.10 148.68 L 270.96 149.65 L 275.71 151.04 L 280.04 152.70 L 283.99 154.61 L 287.59 156.76 L 292.14 160.10 L 296.15 163.69 L 299.67 167.56 L 302.85 171.73 L 305.60 175.97 L 307.94 180.29 L 310.91 186.93 L 313.11 193.22 L 314.81 199.39 L 315.91 204.60 L 316.69 209.44 L 317.06 212.71 L 317.29 215.80 L 320.19 216.75 L 323.18 217.90 L 327.40 219.80 L 331.79 222.15 L 336.64 225.29 L 339.75 227.69 L 342.75 230.44 L 345.64 233.56 L 348.18 236.94 L 350.35 240.71 L 352.13 244.92 L 353.09 248.17 L 353.71 251.69 L 353.97 255.53 L 353.85 259.71 L 353.08 265.18 L 351.88 270.02 L 350.29 274.31 L 348.26 278.41 L 346.02 282.02 L 343.58 285.18 L 340.85 288.13 L 338.12 290.66 L 335.39 292.82 L 331.16 295.64 L 327.45 297.68 L 323.90 299.30 L 321.43 300.25 L 319.05 301.00 L 319.05 301.00 L 251.99 301.00 L 251.99 301.00 L 263.46 301.00 L 263.46 290.63 L 251.99 290.63 L 251.99 260.46 L 268.27 260.46 L 268.27 260.39 L 268.34 260.43 L 269.03 260.29 L 269.57 259.91 L 269.94 259.36 L 270.08 258.68 L 269.82 257.84 L 269.82 257.84 L 269.92 257.84 L 269.63 257.54 L 269.48 257.40 L 269.48 257.40 L 239.43 225.61 L 238.65 224.96 L 237.66 224.72 L 236.69 224.94 L 235.94 225.52 L 235.94 225.52 L 205.66 257.57 L 205.17 258.11 L 204.98 258.82 L 205.11 259.44 L 205.43 259.93 L 205.91 260.25 L 206.51 260.38 L 206.51 260.38 L 206.51 260.45 L 223.60 260.45 L 223.60 290.63 L 211.72 290.63 L 211.72 300.98 L 149.72 300.98 L 147.36 300.29 L 141.43 298.02 L 137.76 296.26 L 133.63 293.89 L 129.60 291.08 L 125.67 287.63 L 123.28 285.04 L 121.15 282.15 L 119.25 278.95 L 117.77 275.54 L 116.71 271.77 L 116.09 267.57 L 116.03 263.33 L 116.60 258.57 L 117.89 253.21 L 118.25 252.08 L 120.57 245.82 L 122.94 240.56 L 125.36 236.19 L 128.13 232.00 L 130.82 228.57 L 133.44 225.80 L 136.31 223.27 L 138.99 221.28 L 141.49 219.76 L 145.33 217.95 L 148.49 216.90 L 151.46 216.26 L 153.44 216.03 L 155.31 216.01 L 154.82 213.95 L 154.48 211.78 L 154.26 208.66 L 154.37 205.32 L 155.00 201.50 L 155.74 198.98 L 156.87 196.42 L 158.41 193.81 L 160.28 191.44 L 162.73 189.15 L 165.85 186.93 L 170.12 184.73 L 174.13 183.25 L 177.93 182.41 L 181.82 182.04 L 185.38 182.08 L 188.64 182.49 L 193.30 183.66 L 196.92 185.10 L 200.02 186.82 L 201.70 188.13 L 202.29 188.24 L 203.46 188.19 L 203.98 187.92 L 204.34 187.40 Z" fill="#ffffff" fill-rule="nonzero" group-id="1" id="形状结合" node-id="19" stroke="none" target-height="153.01" target-width="237.94" target-x="116.03" target-y="147.99"></path></g></svg>
\ No newline at end of file
+<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg height="480" node-id="1" sillyvg="true" template-height="480" template-width="480" version="1.1" viewBox="0 0 480 480" width="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><defs node-id="21"><linearGradient gradientUnits="objectBoundingBox" id="linearGradient-1" node-id="7" spreadMethod="pad" x1="0.5" x2="0.5" y1="1" y2="0.054371078"><stop offset="0" stop-color="#eeeeee" stop-opacity="0.19006774"></stop><stop offset="1" stop-color="#ffffff"></stop></linearGradient></defs><g node-id="28"><g node-id="29"><g node-id="30"><path d="M 251.86 431.42 L 258.68 432.65 L 265.55 433.47 L 272.49 433.88 L 279.51 433.89 L 286.61 433.48 L 295.40 432.44 L 304.22 430.82 L 313.07 428.63 L 321.98 425.84 L 330.68 422.57 L 339.31 418.79 L 347.87 414.49 L 356.38 409.64 L 364.61 404.41 L 372.65 398.73 L 380.52 392.60 L 388.22 386.00 L 395.58 379.08 L 402.65 371.78 L 409.42 364.11 L 415.91 356.04 L 422.00 347.70 L 427.69 339.07 L 432.97 330.13 L 437.86 320.89 L 442.27 311.42 L 446.18 301.74 L 449.58 291.82 L 452.49 281.66 L 454.82 271.38 L 456.55 260.93 L 457.68 250.31 L 458.20 239.48 L 457.28 227.35 L 455.75 215.45 L 453.62 203.78 L 450.88 192.31 L 447.53 180.97 L 443.63 169.93 L 439.19 159.19 L 434.19 148.71 L 428.65 138.50 L 422.61 128.66 L 416.08 119.18 L 409.05 110.05 L 401.55 101.30 L 393.60 93.00 L 385.22 85.13 L 376.37 77.69 L 369.02 72.09 L 361.42 66.83 L 353.56 61.89 L 345.44 57.28 L 337.05 52.98 L 328.52 49.10 L 319.76 45.58 L 310.76 42.44 L 301.51 39.68 L 292.00 37.30 L 282.49 35.38 L 272.76 33.88 L 262.80 32.81 L 252.60 32.18 L 242.14 32.01 L 233.00 32.31 L 224.10 33.02 L 215.42 34.14 L 206.95 35.65 L 198.68 37.56 L 188.40 40.51 L 178.46 43.97 L 168.83 47.95 L 159.49 52.45 L 150.38 57.45 L 141.60 62.86 L 133.13 68.68 L 124.96 74.91 L 117.08 81.54 L 109.55 88.45 L 102.35 95.67 L 95.48 103.20 L 88.92 111.02 L 82.73 119.02 L 76.90 127.20 L 71.44 135.56 L 66.28 144.15 L 61.53 152.80 L 57.18 161.48 L 53.22 170.23 L 49.58 179.17 L 46.37 188.02 L 43.58 196.78 L 41.21 205.45 L 39.19 214.31 L 37.63 222.92 L 36.50 231.31 L 35.80 239.48 L 35.54 248.08 L 35.77 256.51 L 36.49 264.80 L 37.69 272.96 L 39.36 281.00 L 41.50 288.96 L 44.07 296.77 L 47.07 304.44 L 50.51 311.98 L 54.39 319.40 L 58.62 326.58 L 63.24 333.59 L 68.24 340.45 L 73.64 347.16 L 79.45 353.72 L 87.06 361.49 L 95.20 368.97 L 103.89 376.15 L 113.15 383.03 L 122.68 389.41 L 132.66 395.43 L 143.12 401.09 L 154.06 406.38 L 165.17 411.14 L 176.65 415.48 L 188.51 419.37 L 200.77 422.82 L 213.11 425.73 L 225.72 428.14 L 238.63 430.04 L 251.86 431.42 Z" fill="#909efc" fill-opacity="0.2" fill-rule="evenodd" group-id="1,2,3" id="1" node-id="13" stroke="none" target-height="401.88" target-width="422.66" target-x="35.54" target-y="32.01"></path></g><g node-id="31"><path d="M 214.12 447.32 L 223.10 447.45 L 231.95 447.13 L 240.68 446.36 L 249.31 445.14 L 257.84 443.47 L 268.40 440.80 L 278.71 437.54 L 288.78 433.71 L 298.63 429.28 L 308.23 424.32 L 317.49 418.90 L 326.42 413.03 L 335.04 406.68 L 343.34 399.90 L 351.20 392.80 L 358.63 385.37 L 365.64 377.61 L 372.28 369.48 L 378.37 361.17 L 383.94 352.68 L 389.00 344.00 L 393.60 335.00 L 397.56 325.98 L 400.92 316.91 L 403.68 307.78 L 405.47 300.31 L 406.82 292.89 L 407.72 285.53 L 408.19 278.20 L 408.24 270.90 L 406.91 259.68 L 405.17 248.72 L 403.02 238.03 L 400.48 227.58 L 397.53 217.37 L 394.12 207.20 L 390.35 197.37 L 386.23 187.87 L 381.74 178.69 L 376.91 169.81 L 371.62 161.10 L 366.01 152.79 L 360.10 144.89 L 353.86 137.36 L 347.31 130.21 L 340.35 123.37 L 333.12 117.00 L 325.60 111.09 L 317.79 105.61 L 309.68 100.56 L 301.30 96.00 L 292.65 91.96 L 283.73 88.42 L 274.51 85.39 L 264.97 82.86 L 255.40 80.94 L 245.56 79.59 L 235.41 78.82 L 224.94 78.65 L 214.12 79.10 L 202.62 80.21 L 191.38 81.84 L 180.38 83.97 L 169.61 86.59 L 158.93 89.74 L 148.57 93.32 L 138.51 97.32 L 128.75 101.76 L 119.17 106.67 L 109.98 111.93 L 101.16 117.55 L 92.71 123.53 L 84.52 129.94 L 76.79 136.63 L 69.50 143.60 L 62.65 150.85 L 56.15 158.49 L 50.18 166.33 L 44.71 174.38 L 39.73 182.66 L 35.23 191.24 L 31.30 199.96 L 27.94 208.84 L 25.13 217.90 L 22.90 227.12 L 21.31 236.45 L 20.34 245.91 L 20.00 255.51 L 20.50 262.17 L 21.47 269.08 L 22.94 276.28 L 24.92 283.79 L 27.26 291.07 L 30.04 298.51 L 33.28 306.13 L 37.00 313.92 L 42.41 324.00 L 48.54 334.14 L 55.42 344.37 L 62.78 354.27 L 70.76 364.00 L 79.37 373.60 L 88.42 382.78 L 97.97 391.59 L 108.03 400.05 L 115.85 406.06 L 123.87 411.75 L 132.12 417.12 L 140.59 422.18 L 149.26 426.88 L 158.09 431.15 L 167.07 435.01 L 176.22 438.45 L 185.53 441.44 L 194.94 443.91 L 204.46 445.87 L 214.12 447.32 Z" fill="#909efc" fill-opacity="0.3" fill-rule="evenodd" group-id="1,2,4" id="2" node-id="14" stroke="none" target-height="368.80002" target-width="388.24" target-x="20" target-y="78.65"></path></g><g node-id="32"><path d="M 227.92 410.45 L 238.47 411.99 L 248.68 412.98 L 258.56 413.44 L 268.15 413.39 L 277.45 412.85 L 286.89 411.80 L 296.00 410.31 L 304.80 408.38 L 313.32 406.03 L 321.57 403.27 L 329.74 400.03 L 337.61 396.44 L 345.17 392.49 L 352.45 388.19 L 359.45 383.53 L 366.24 378.48 L 372.71 373.16 L 378.87 367.55 L 384.72 361.64 L 390.29 355.43 L 396.81 347.30 L 402.81 338.83 L 408.29 330.01 L 413.27 320.81 L 417.69 311.34 L 421.55 301.64 L 424.84 291.68 L 427.57 281.46 L 429.70 271.11 L 431.21 260.63 L 432.11 250.00 L 432.39 239.20 L 432.71 232.07 L 432.60 224.95 L 432.06 217.81 L 431.09 210.65 L 429.67 203.45 L 427.37 194.63 L 424.48 185.84 L 420.99 177.08 L 416.89 168.32 L 412.34 159.85 L 407.28 151.50 L 401.72 143.27 L 395.62 135.14 L 389.18 127.33 L 382.34 119.75 L 375.08 112.38 L 367.41 105.23 L 359.45 98.42 L 351.21 91.93 L 342.66 85.77 L 333.81 79.93 L 324.72 74.46 L 315.46 69.41 L 306.02 64.79 L 296.39 60.57 L 286.56 56.79 L 276.68 53.53 L 266.73 50.76 L 256.70 48.50 L 246.55 46.76 L 236.44 45.60 L 226.37 45.03 L 216.31 45.03 L 204.95 45.35 L 194.05 46.20 L 183.58 47.57 L 173.50 49.44 L 163.81 51.80 L 154.08 54.73 L 144.78 58.08 L 135.89 61.84 L 127.37 66.01 L 119.22 70.58 L 111.25 75.64 L 103.69 81.03 L 96.52 86.73 L 89.74 92.75 L 83.33 99.11 L 77.23 105.84 L 71.55 112.79 L 66.27 119.99 L 61.39 127.43 L 56.90 135.12 L 52.81 143.05 L 49.14 151.13 L 45.91 159.36 L 43.10 167.76 L 40.71 176.34 L 38.77 185.02 L 37.28 193.76 L 36.25 202.58 L 35.67 211.49 L 35.54 220.50 L 36.72 228.95 L 38.50 239.23 L 40.78 249.96 L 43.91 262.03 L 46.36 270.12 L 49.28 278.58 L 52.69 287.41 L 56.47 296.08 L 60.82 304.90 L 65.77 313.89 L 71.17 322.60 L 77.26 331.29 L 84.07 339.96 L 89.48 346.17 L 95.34 352.26 L 101.68 358.26 L 108.51 364.15 L 115.54 369.63 L 123.12 374.93 L 131.27 380.04 L 140.02 384.95 L 148.87 389.35 L 158.37 393.48 L 168.58 397.33 L 179.52 400.88 L 190.39 403.86 L 202.05 406.46 L 214.55 408.66 L 227.92 410.45 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,5" id="3" node-id="15" stroke="none" target-height="368.41" target-width="397.16998" target-x="35.54" target-y="45.03"></path></g><g node-id="33"><path d="M 213.18 406.50 L 224.92 408.67 L 236.29 410.39 L 247.31 411.67 L 257.99 412.52 L 268.84 412.93 L 279.24 412.81 L 289.23 412.18 L 298.83 411.05 L 306.54 409.72 L 313.96 408.00 L 321.10 405.90 L 327.97 403.44 L 334.59 400.60 L 341.03 397.32 L 347.20 393.61 L 353.11 389.43 L 358.77 384.80 L 364.20 379.68 L 369.15 374.28 L 373.87 368.33 L 378.34 361.79 L 382.57 354.63 L 386.55 346.80 L 389.91 339.05 L 393.01 330.60 L 395.83 321.39 L 398.35 311.36 L 400.56 300.48 L 402.19 290.05 L 403.50 278.76 L 404.44 266.57 L 405.00 253.41 L 405.13 239.22 L 402.23 226.31 L 398.92 214.24 L 395.23 202.95 L 391.16 192.41 L 386.48 181.91 L 381.51 172.13 L 376.25 163.01 L 370.71 154.53 L 364.64 146.27 L 358.37 138.61 L 351.89 131.52 L 345.21 124.98 L 338.13 118.76 L 330.92 113.06 L 323.59 107.86 L 316.13 103.14 L 308.39 98.78 L 300.61 94.87 L 292.79 91.41 L 284.92 88.38 L 276.86 85.71 L 268.87 83.45 L 260.92 81.58 L 253.02 80.10 L 242.36 78.66 L 231.99 77.86 L 221.88 77.67 L 208.81 78.44 L 196.53 79.62 L 185.01 81.18 L 174.21 83.10 L 164.08 85.36 L 153.69 88.18 L 143.96 91.32 L 134.87 94.75 L 126.37 98.48 L 118.44 102.48 L 110.54 107.03 L 103.17 111.83 L 96.31 116.88 L 89.93 122.18 L 84.02 127.73 L 78.37 133.68 L 73.15 139.86 L 68.33 146.27 L 63.91 152.91 L 59.87 159.80 L 55.36 168.70 L 51.39 177.92 L 47.95 187.48 L 45.06 197.40 L 42.73 207.42 L 40.88 217.72 L 39.51 228.31 L 38.63 239.22 L 39.87 245.65 L 41.59 252.78 L 43.84 260.67 L 46.42 268.42 L 49.53 276.63 L 53.21 285.33 L 57.24 293.84 L 61.83 302.57 L 67.03 311.51 L 72.61 320.21 L 78.79 328.88 L 85.59 337.54 L 92.82 345.83 L 100.68 353.90 L 109.20 361.73 L 115.85 367.26 L 122.88 372.55 L 130.30 377.60 L 138.13 382.41 L 146.12 386.79 L 154.54 390.84 L 163.38 394.54 L 172.70 397.89 L 182.07 400.72 L 191.92 403.10 L 202.28 405.04 L 213.18 406.50 Z" fill="#909efc" fill-opacity="0.4" fill-rule="evenodd" group-id="1,2,6" id="4" node-id="16" stroke="none" target-height="335.26" target-width="366.5" target-x="38.63" target-y="77.67"></path></g><path d="M 215.43 367.61 L 228.38 365.58 L 240.79 363.09 L 252.69 360.15 L 264.10 356.78 L 275.47 352.85 L 286.26 348.54 L 296.50 343.88 L 306.21 338.86 L 315.75 333.30 L 324.68 327.46 L 333.02 321.34 L 340.80 314.94 L 348.27 308.04 L 355.10 300.94 L 361.31 293.63 L 366.93 286.11 L 371.11 279.78 L 374.85 273.36 L 378.18 266.84 L 381.11 260.21 L 383.65 253.47 L 385.78 246.57 L 387.48 239.59 L 388.75 232.53 L 389.59 225.38 L 389.99 218.11 L 390.99 207.41 L 391.38 197.25 L 391.20 187.58 L 390.47 178.38 L 389.22 169.61 L 387.37 160.72 L 385.03 152.29 L 382.23 144.28 L 378.97 136.68 L 375.25 129.45 L 370.99 122.43 L 366.33 115.79 L 361.27 109.53 L 355.78 103.62 L 349.86 98.06 L 343.61 92.91 L 337.01 88.12 L 330.03 83.68 L 322.65 79.60 L 314.86 75.87 L 306.97 72.62 L 298.77 69.73 L 290.22 67.21 L 281.32 65.07 L 272.04 63.32 L 262.85 62.02 L 253.38 61.11 L 243.63 60.60 L 233.57 60.51 L 223.20 60.84 L 211.53 63.82 L 200.46 67.12 L 189.97 70.74 L 180.02 74.66 L 170.60 78.87 L 161.68 83.38 L 152.72 88.45 L 144.27 93.80 L 136.32 99.43 L 128.83 105.32 L 121.80 111.50 L 115.20 117.95 L 108.89 124.82 L 103.01 131.95 L 97.56 139.36 L 92.51 147.04 L 87.86 155.01 L 83.62 163.28 L 79.84 171.70 L 76.44 180.38 L 73.43 189.35 L 70.81 198.62 L 68.58 208.20 L 66.75 218.11 L 66.59 242.02 L 66.69 254.60 L 67.16 267.49 L 67.76 276.10 L 68.68 284.62 L 69.93 293.06 L 71.60 301.41 L 73.75 309.48 L 76.38 317.28 L 78.74 322.99 L 81.46 328.46 L 84.54 333.70 L 87.99 338.72 L 91.81 343.43 L 96.10 347.86 L 100.90 352.02 L 106.23 355.92 L 111.77 359.29 L 117.97 362.34 L 124.88 365.06 L 132.56 367.43 L 140.16 369.19 L 148.63 370.53 L 158.03 371.42 L 168.47 371.81 L 178.53 371.66 L 189.65 370.95 L 201.92 369.62 L 215.43 367.61 Z" fill="#909efc" fill-opacity="0.6" fill-rule="evenodd" group-id="1,2,7" id="5" node-id="17" stroke="none" target-height="311.3" target-width="324.79" target-x="66.59" target-y="60.51"></path></g><path d="M 193.00 208.00 L 281.00 208.00 L 281.00 291.00 L 193.00 291.00 Z" fill="url(#linearGradient-1)" fill-rule="evenodd" group-id="1" id="矩形" node-id="18" stroke="none" target-height="83" target-width="88" target-x="193" target-y="208"></path><path d="M 204.34 187.40 L 205.82 184.30 L 207.42 181.42 L 209.93 177.40 L 212.87 173.22 L 216.68 168.52 L 221.00 163.96 L 226.13 159.47 L 229.84 156.76 L 233.89 154.30 L 238.30 152.10 L 242.89 150.34 L 247.86 149.04 L 253.23 148.21 L 257.26 147.99 L 261.54 148.14 L 266.10 148.68 L 270.96 149.65 L 275.71 151.04 L 280.04 152.70 L 283.99 154.61 L 287.59 156.76 L 292.14 160.10 L 296.15 163.69 L 299.67 167.56 L 302.85 171.73 L 305.60 175.97 L 307.94 180.29 L 310.91 186.93 L 313.11 193.22 L 314.81 199.39 L 315.91 204.60 L 316.69 209.44 L 317.06 212.71 L 317.29 215.80 L 320.19 216.75 L 323.18 217.90 L 327.40 219.80 L 331.79 222.15 L 336.64 225.29 L 339.75 227.69 L 342.75 230.44 L 345.64 233.56 L 348.18 236.94 L 350.35 240.71 L 352.13 244.92 L 353.09 248.17 L 353.71 251.69 L 353.97 255.53 L 353.85 259.71 L 353.08 265.18 L 351.88 270.02 L 350.29 274.31 L 348.26 278.41 L 346.02 282.02 L 343.58 285.18 L 340.85 288.13 L 338.12 290.66 L 335.39 292.82 L 331.16 295.64 L 327.45 297.68 L 323.90 299.30 L 321.43 300.25 L 319.05 301.00 L 319.05 301.00 L 251.99 301.00 L 251.99 301.00 L 263.46 301.00 L 263.46 290.63 L 251.99 290.63 L 251.99 260.46 L 268.27 260.46 L 268.27 260.39 L 268.34 260.43 L 269.03 260.29 L 269.57 259.91 L 269.94 259.36 L 270.08 258.68 L 269.82 257.84 L 269.82 257.84 L 269.92 257.84 L 269.63 257.54 L 269.48 257.40 L 269.48 257.40 L 239.43 225.61 L 238.65 224.96 L 237.66 224.72 L 236.69 224.94 L 235.94 225.52 L 235.94 225.52 L 205.66 257.57 L 205.17 258.11 L 204.98 258.82 L 205.11 259.44 L 205.43 259.93 L 205.91 260.25 L 206.51 260.38 L 206.51 260.38 L 206.51 260.45 L 223.60 260.45 L 223.60 290.63 L 211.72 290.63 L 211.72 300.98 L 149.72 300.98 L 147.36 300.29 L 141.43 298.02 L 137.76 296.26 L 133.63 293.89 L 129.60 291.08 L 125.67 287.63 L 123.28 285.04 L 121.15 282.15 L 119.25 278.95 L 117.77 275.54 L 116.71 271.77 L 116.09 267.57 L 116.03 263.33 L 116.60 258.57 L 117.89 253.21 L 118.25 252.08 L 120.57 245.82 L 122.94 240.56 L 125.36 236.19 L 128.13 232.00 L 130.82 228.57 L 133.44 225.80 L 136.31 223.27 L 138.99 221.28 L 141.49 219.76 L 145.33 217.95 L 148.49 216.90 L 151.46 216.26 L 153.44 216.03 L 155.31 216.01 L 154.82 213.95 L 154.48 211.78 L 154.26 208.66 L 154.37 205.32 L 155.00 201.50 L 155.74 198.98 L 156.87 196.42 L 158.41 193.81 L 160.28 191.44 L 162.73 189.15 L 165.85 186.93 L 170.12 184.73 L 174.13 183.25 L 177.93 182.41 L 181.82 182.04 L 185.38 182.08 L 188.64 182.49 L 193.30 183.66 L 196.92 185.10 L 200.02 186.82 L 201.70 188.13 L 202.29 188.24 L 203.46 188.19 L 203.98 187.92 L 204.34 187.40 Z" fill="#ffffff" fill-rule="nonzero" group-id="1" id="形状结合" node-id="19" stroke="none" target-height="153.01" target-width="237.94" target-x="116.03" target-y="147.99"></path></g></svg>
diff --git a/WorkflowExecAgent/README.md b/WorkflowExecAgent/README.md
new file mode 100644
index 0000000000..0a4b7f333e
--- /dev/null
+++ b/WorkflowExecAgent/README.md
@@ -0,0 +1,131 @@
+# Workflow Executor Agent
+
+## Overview
+
+GenAI Workflow Executor Example showcases the capability to handle data/AI workflow operations via LangChain agents to execute custom-defined workflow-based tools. These workflow tools can be interfaced from any 3rd-party tools in the market (no-code/low-code/IDE) such as Alteryx, RapidMiner, Power BI, Intel Data Insight Automation which allows users to create complex data/AI workflow operations for different use-cases.
+
+### Workflow Executor
+
+This example demonstrates a single React-LangGraph with a `Workflow Executor` tool to ingest a user prompt to execute workflows and return an agent reasoning response based on the workflow output data.
+
+First the LLM extracts the relevant information from the user query based on the schema of the tool in `tools/tools.yaml`. Then the agent sends this `AgentState` to the `Workflow Executor` tool.
+
+`Workflow Executor` tool uses `EasyDataSDK` class as seen under `tools/sdk.py` to interface with several high-level API's. There are 3 steps to this tool implementation:
+
+1. Starts the workflow with workflow parameters and workflow id extracted from the user query.
+
+2. Periodically checks the workflow status for completion or failure. This may be through a database which stores the current status of the workflow
+
+3. Retrieves the output data from the workflow through a storage service.
+
+The `AgentState` is sent back to the LLM for reasoning. Based on the output data, the LLM generates a response to answer the user's input prompt.
+
+Below shows an illustration of this flow:
+
+![image](https://github.com/user-attachments/assets/cb135042-1505-4aef-8822-c78c2f72aa2a)
+
+### Workflow Serving for Agent
+
+As an example, here we have a Churn Prediction use-case workflow as the serving workflow for the agent execution. It is created through Intel Data Insight Automation platform. The image below shows a snapshot of the Churn Prediction workflow.
+
+![image](https://github.com/user-attachments/assets/c067f8b3-86cf-4abc-a8bd-51a98de8172d)
+
+The workflow contains 2 paths which can be seen in the workflow illustrated, the top path and bottom path.
+
+1. Top path - The training path which ends at the random forest classifier node is the training path. The data is cleaned through a series of nodes and used to train a random forest model for prediction.
+
+2. Bottom path - The inference path where trained random forest model is used for inferencing based on input parameter.
+
+For this agent workflow execution, the inferencing path is executed to yield the final output result of the `Model Predictor` node. The same output is returned to the `Workflow Executor` tool through the `Langchain API Serving` node.
+
+There are `Serving Parameters` in the workflow, which are the tool input variables used to start a workflow instance obtained from `params` the LLM extracts from the user query. Below shows the parameter configuration option for the Intel Data Insight Automation workflow UI.
+
+![image](https://github.com/user-attachments/assets/ce8ef01a-56ff-4278-b84d-b6e4592b28c6)
+
+Manually running the workflow yields the tabular data output as shown below:
+
+![image](https://github.com/user-attachments/assets/241c1aba-2a24-48da-8005-ec7bfe657179)
+
+In the workflow serving for agent, this output will be returned to the `Workflow Executor` tool. The LLM can then answer the user's original question based on this output.
+
+To start prompting the agent microservice, we will use the following command for this use case:
+
+```sh
+curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "I have a data with gender Female, tenure 55, MonthlyAvgCharges 103.7. Predict if this entry will churn. My workflow id is '${workflow_id}'."
+    }'
+```
+
+The user has to provide a `workflow_id` and workflow `params` in the query. `workflow_id` a unique id used for serving the workflow to the microservice. Notice that the `query` string includes all the workflow `params` which the user has defined in the workflow. The LLM will extract these parameters into a dictionary format for the workflow `Serving Parameters` as shown below:
+
+```python
+params = {"gender": "Female", "tenure": 55, "MonthlyAvgCharges": 103.7}
+```
+
+These parameters will be passed into the `Workflow Executor` tool to start the workflow execution of specified `workflow_id`. Thus, everything will be handled via the microservice.
+
+And finally here are the results from the microservice logs:
+
+![image](https://github.com/user-attachments/assets/969fefb7-543d-427f-a56c-dc70e474ae60)
+
+## Microservice Setup
+
+### Start Agent Microservice
+
+Workflow Executor will have a single docker image. First, build the agent docker image.
+
+```sh
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples//WorkflowExecAgent/docker_image_build/
+docker compose -f build.yaml build --no-cache
+```
+
+Configure `GenAIExamples/WorkflowExecAgent/docker_compose/.env` file with the following. Replace the variables according to your usecase.
+
+```sh
+export SDK_BASE_URL=${SDK_BASE_URL}
+export SERVING_TOKEN=${SERVING_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export llm_engine=${llm_engine}
+export llm_endpoint_url=${llm_endpoint_url}
+export ip_address=$(hostname -I | awk '{print $1}')
+export model="mistralai/Mistral-7B-Instruct-v0.3"
+export recursion_limit=${recursion_limit}
+export temperature=0
+export max_new_tokens=1000
+export WORKDIR=${WORKDIR}
+export TOOLSET_PATH=$WORKDIR/GenAIExamples/WorkflowExecAgent/tools/
+export http_proxy=${http_proxy}
+export https_proxy=${https_proxy}
+```
+
+Launch service by running the docker compose command.
+
+```sh
+cd $WORKDIR/GenAIExamples/WorkflowExecAgent/docker_compose
+docker compose -f compose.yaml up -d
+```
+
+### Validate service
+
+The microservice logs can be viewed using:
+
+```sh
+docker logs workflowexec-agent-endpoint
+```
+
+You should be able to see "HTTP server setup successful" upon successful startup.
+
+You can validate the service using the following command:
+
+```sh
+curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "I have a data with gender Female, tenure 55, MonthlyAvgCharges 103.7. Predict if this entry will churn. My workflow id is '${workflow_id}'."
+    }'
+```
+
+Update the `query` with the workflow parameters, workflow id, etc based on the workflow context.
+
+## Roadmap
+
+Phase II: Agent memory integration to enable capability to store tool intermediate results, such as workflow instance key.
diff --git a/WorkflowExecAgent/docker_compose/intel/cpu/xeon/compose_vllm.yaml b/WorkflowExecAgent/docker_compose/intel/cpu/xeon/compose_vllm.yaml
new file mode 100644
index 0000000000..6fede271a8
--- /dev/null
+++ b/WorkflowExecAgent/docker_compose/intel/cpu/xeon/compose_vllm.yaml
@@ -0,0 +1,31 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  worflowexec-agent:
+    image: opea/agent-langchain:latest
+    container_name: workflowexec-agent-endpoint
+    volumes:
+      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
+      - ${TOOLSET_PATH}:/home/user/tools/
+    ports:
+      - "9090:9090"
+    ipc: host
+    environment:
+      ip_address: ${ip_address}
+      strategy: react_langgraph
+      recursion_limit: ${recursion_limit}
+      llm_engine: ${llm_engine}
+      llm_endpoint_url: ${llm_endpoint_url}
+      model: ${model}
+      temperature: ${temperature}
+      max_new_tokens: ${max_new_tokens}
+      streaming: false
+      tools: /home/user/tools/tools.yaml
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      port: 9090
+      SDK_BASE_URL: ${SDK_BASE_URL}
+      SERVING_TOKEN: ${SERVING_TOKEN}
+      custom_prompt: /home/user/tools/custom_prompt.py
diff --git a/WorkflowExecAgent/docker_image_build/build.yaml b/WorkflowExecAgent/docker_image_build/build.yaml
new file mode 100644
index 0000000000..e2a778b9aa
--- /dev/null
+++ b/WorkflowExecAgent/docker_image_build/build.yaml
@@ -0,0 +1,13 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  agent-langchain:
+    build:
+      context: GenAIComps
+      dockerfile: comps/agent/langchain/Dockerfile
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+        no_proxy: ${no_proxy}
+    image: ${REGISTRY:-opea}/agent-langchain:${TAG:-latest}
diff --git a/WorkflowExecAgent/tests/1_build_images.sh b/WorkflowExecAgent/tests/1_build_images.sh
new file mode 100644
index 0000000000..ebb4883f44
--- /dev/null
+++ b/WorkflowExecAgent/tests/1_build_images.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+WORKPATH=$(dirname "$PWD")
+export WORKDIR=$WORKPATH/../../
+echo "WORKDIR=${WORKDIR}"
+
+function get_genai_comps() {
+    if [ ! -d "GenAIComps" ] ; then
+        git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
+    fi
+}
+
+function build_agent_docker_image() {
+    cd $WORKDIR/GenAIExamples/WorkflowExecAgent/docker_image_build/
+    get_genai_comps
+    echo "Build agent image with --no-cache..."
+    docker compose -f build.yaml build --no-cache
+}
+
+function main() {
+    echo "==================== Build agent docker image ===================="
+    build_agent_docker_image
+    echo "==================== Build agent docker image completed ===================="
+}
+
+main
diff --git a/WorkflowExecAgent/tests/2_start_vllm_service.sh b/WorkflowExecAgent/tests/2_start_vllm_service.sh
new file mode 100644
index 0000000000..2c34253284
--- /dev/null
+++ b/WorkflowExecAgent/tests/2_start_vllm_service.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+
+WORKPATH=$(dirname "$PWD")
+LOG_PATH="$WORKPATH/tests"
+vllm_port=${vllm_port}
+[[ -z "$vllm_port" ]] && vllm_port=8084
+model=mistralai/Mistral-7B-Instruct-v0.3
+export WORKDIR=$WORKPATH/../../
+export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+
+function build_vllm_docker_image() {
+    echo "Building the vllm docker images"
+    cd $WORKPATH
+    echo $WORKPATH
+    if [ ! -d "./vllm" ]; then
+        git clone https://github.com/vllm-project/vllm.git
+        cd ./vllm; git checkout tags/v0.6.0
+    else
+        cd ./vllm
+    fi
+    docker build -f Dockerfile.cpu -t vllm-cpu-env --shm-size=100g .
+    if [ $? -ne 0 ]; then
+        echo "opea/vllm:cpu failed"
+        exit 1
+    else
+        echo "opea/vllm:cpu successful"
+    fi
+}
+
+function start_vllm_service() {
+    echo "start vllm service"
+    docker run -d -p ${vllm_port}:${vllm_port} --rm --network=host --name test-comps-vllm-service -v ~/.cache/huggingface:/root/.cache/huggingface -v ${WORKPATH}/tests/tool_chat_template_mistral_custom.jinja:/root/tool_chat_template_mistral_custom.jinja -e HF_TOKEN=$HF_TOKEN -e http_proxy=$http_proxy -e https_proxy=$https_proxy -it vllm-cpu-env --model ${model} --port ${vllm_port} --chat-template /root/tool_chat_template_mistral_custom.jinja --enable-auto-tool-choice --tool-call-parser mistral
+    echo ${LOG_PATH}/vllm-service.log
+    sleep 5s
+    echo "Waiting vllm ready"
+    n=0
+    until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
+        docker logs test-comps-vllm-service &> ${LOG_PATH}/vllm-service.log
+        n=$((n+1))
+        if grep -q "Uvicorn running on" ${LOG_PATH}/vllm-service.log; then
+            break
+        fi
+        if grep -q "No such container" ${LOG_PATH}/vllm-service.log; then
+            echo "container test-comps-vllm-service not found"
+            exit 1
+        fi
+        sleep 5s
+    done
+    sleep 5s
+    echo "Service started successfully"
+}
+
+function main() {
+    echo "==================== Build vllm docker image ===================="
+    build_vllm_docker_image
+    echo "==================== Build vllm docker image completed ===================="
+
+    echo "==================== Start vllm docker service ===================="
+    start_vllm_service
+    echo "==================== Start vllm docker service completed ===================="
+}
+
+main
diff --git a/WorkflowExecAgent/tests/3_launch_and_validate_agent.sh b/WorkflowExecAgent/tests/3_launch_and_validate_agent.sh
new file mode 100644
index 0000000000..5c9e6da583
--- /dev/null
+++ b/WorkflowExecAgent/tests/3_launch_and_validate_agent.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+set -e
+
+WORKPATH=$(dirname "$PWD")
+workflow_id=9809
+vllm_port=${vllm_port}
+[[ -z "$vllm_port" ]] && vllm_port=8084
+export WORKDIR=$WORKPATH/../../
+echo "WORKDIR=${WORKDIR}"
+export SDK_BASE_URL=${SDK_BASE_URL}
+export SERVING_TOKEN=${SERVING_TOKEN}
+export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export llm_engine=vllm
+export ip_address=$(hostname -I | awk '{print $1}')
+export llm_endpoint_url=http://${ip_address}:${vllm_port}
+export model=mistralai/Mistral-7B-Instruct-v0.3
+export recursion_limit=25
+export temperature=0
+export max_new_tokens=1000
+export TOOLSET_PATH=$WORKDIR/GenAIExamples/WorkflowExecAgent/tools/
+
+function start_agent_and_api_server() {
+    echo "Starting Agent services"
+    cd $WORKDIR/GenAIExamples/WorkflowExecAgent/docker_compose/intel/cpu/xeon
+    WORKDIR=$WORKPATH/docker_image_build/ docker compose -f compose_vllm.yaml up -d
+    echo "Waiting agent service ready"
+    sleep 5s
+}
+
+function validate() {
+    local CONTENT="$1"
+    local EXPECTED_RESULT="$2"
+    local SERVICE_NAME="$3"
+
+    if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+        echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT"
+        echo "[TEST INFO]: Workflow Executor agent service PASSED"
+    else
+        echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+        echo "[TEST INFO]: Workflow Executor agent service FAILED"
+    fi
+}
+
+function validate_agent_service() {
+    echo "----------------Test agent ----------------"
+    local CONTENT=$(curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+     "query": "I have a data with gender Female, tenure 55, MonthlyAvgCharges 103.7. Predict if this entry will churn. My workflow id is '${workflow_id}'."
+    }')
+    validate "$CONTENT" "The entry is not likely to churn" "workflowexec-agent-endpoint"
+    docker logs workflowexec-agent-endpoint
+}
+
+function main() {
+    echo "==================== Start agent ===================="
+    start_agent_and_api_server
+    echo "==================== Agent started ===================="
+
+    echo "==================== Validate agent service ===================="
+    validate_agent_service
+    echo "==================== Agent service validated ===================="
+}
+
+main
diff --git a/WorkflowExecAgent/tests/README.md b/WorkflowExecAgent/tests/README.md
new file mode 100644
index 0000000000..1dbaab6e93
--- /dev/null
+++ b/WorkflowExecAgent/tests/README.md
@@ -0,0 +1,38 @@
+# Validate Workflow Agent Microservice
+
+Microservice validation for Intel Data Insight Automation platform workflow serving.
+
+## Usage
+
+Configure necessary variables as listed below. Replace the variables according to your usecase.
+
+```sh
+export SDK_BASE_URL=${SDK_BASE_URL}
+export SERVING_TOKEN=${SERVING_TOKEN}
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
+export workflow_id=${workflow_id}       # workflow_id of the serving workflow
+export vllm_port=${vllm_port}           # vllm serving port
+export ip_address=$(hostname -I | awk '{print $1}')
+export VLLM_CPU_OMP_THREADS_BIND=${VLLM_CPU_OMP_THREADS_BIND}
+export http_proxy=${http_proxy}
+export https_proxy=${https_proxy}
+```
+
+Note: `SDK_BASE_URL` and `SERVING_TOKEN` can be obtained from Intel Data Insight Automation platform.
+
+Launch validation by running the following command.
+
+```sh
+cd GenAIExamples/WorkflowExecAgent/tests
+. /test_compose_on_xeon.sh
+```
+
+`test_compose_on_xeon.sh` will run the other `.sh` files under `tests/`. The validation script launches 1 docker container for the agent microservice, and another for the vllm model serving on CPU. When validation is completed, all containers will be stopped.
+
+The validation is tested by checking if the model reasoning output response matches a partial substring. The expected output is shown below:
+
+![image](https://github.com/user-attachments/assets/88081bc8-7b73-470d-970e-92e0fe5f96ec)
+
+## Note
+
+- Currently the validation test is only designed with vllm model serving (CPU only).
diff --git a/WorkflowExecAgent/tests/test_compose_vllm_on_xeon.sh b/WorkflowExecAgent/tests/test_compose_vllm_on_xeon.sh
new file mode 100644
index 0000000000..d1faa05a85
--- /dev/null
+++ b/WorkflowExecAgent/tests/test_compose_vllm_on_xeon.sh
@@ -0,0 +1,33 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+function stop_agent_and_api_server() {
+    echo "Stopping Agent services"
+    docker rm --force $(docker ps -a -q --filter="name=workflowexec-agent-endpoint")
+}
+
+function stop_vllm_docker() {
+    cid=$(docker ps -aq --filter "name=test-comps-vllm-service")
+    echo "Stopping the docker containers "${cid}
+    if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
+    echo "Docker containers stopped successfully"
+}
+
+echo "=================== #1 Building docker images ===================="
+bash 1_build_images.sh
+echo "=================== #1 Building docker images completed ===================="
+
+echo "=================== #2 Start vllm service ===================="
+bash 2_start_vllm_service.sh
+echo "=================== #2 Start vllm service completed ===================="
+
+echo "=================== #3 Start agent and API server ===================="
+bash 3_launch_and_validate_agent.sh
+echo "=================== #3 Agent test completed ===================="
+
+echo "=================== #4 Stop agent and API server ===================="
+stop_agent_and_api_server
+stop_vllm_docker
+echo "=================== #4 Agent and API server stopped ===================="
+
+echo "ALL DONE!"
diff --git a/WorkflowExecAgent/tests/tool_chat_template_mistral_custom.jinja b/WorkflowExecAgent/tests/tool_chat_template_mistral_custom.jinja
new file mode 100644
index 0000000000..05905ea356
--- /dev/null
+++ b/WorkflowExecAgent/tests/tool_chat_template_mistral_custom.jinja
@@ -0,0 +1,83 @@
+{%- if messages[0]["role"] == "system" %}
+    {%- set system_message = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}
+
+{%- for message in loop_messages | rejectattr("role", "equalto", "tool") | rejectattr("role", "equalto", "tool_results") | selectattr("tool_calls", "undefined") %}
+{%- endfor %}
+
+{{- bos_token }}
+{%- for message in loop_messages %}
+    {%- if message["role"] == "user" %}
+        {%- if tools is not none and (message == user_messages[-1]) %}
+            {{- "[AVAILABLE_TOOLS] [" }}
+            {%- for tool in tools %}
+                {%- set tool = tool.function %}
+                {{- '{"type": "function", "function": {' }}
+                {%- for key, val in tool.items() if key != "return" %}
+                    {%- if val is string %}
+                        {{- '"' + key + '": "' + val + '"' }}
+                    {%- else %}
+                        {{- '"' + key + '": ' + val|tojson }}
+                    {%- endif %}
+                    {%- if not loop.last %}
+                        {{- ", " }}
+                    {%- endif %}
+                {%- endfor %}
+                {{- "}}" }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- else %}
+                    {{- "]" }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "[/AVAILABLE_TOOLS]" }}
+        {%- endif %}
+        {%- if loop.last and system_message is defined %}
+            {{- "[INST] " + system_message + "\n\n" + message["content"] + "[/INST]" }}
+        {%- else %}
+            {{- "[INST] " + message["content"] + "[/INST]" }}
+        {%- endif %}
+    {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %}
+        {%- if message.tool_calls is defined %}
+            {%- set tool_calls = message.tool_calls %}
+        {%- else %}
+            {%- set tool_calls = message.content %}
+        {%- endif %}
+        {{- "[TOOL_CALLS] [" }}
+        {%- for tool_call in tool_calls %}
+            {%- set out = tool_call.function|tojson %}
+            {{- out[:-1] }}
+            {%- if not tool_call.id is defined or tool_call.id|length < 9 %}
+                {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (1)" + tool_call.id) }}
+            {%- endif %}
+            {{- ', "id": "' + tool_call.id[-9:] + '"}' }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- else %}
+                {{- "]" + eos_token }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif message["role"] == "assistant" %}
+        {{- " " + message["content"] + eos_token }}
+    {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {{- '[TOOL_RESULTS] {"content": ' + content|string + ", " }}
+        {%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %}
+            {{- raise_exception("Tool call IDs should be alphanumeric strings with length >= 9! (2)" + message.tool_call_id) }}
+        {%- endif %}
+        {{- '"call_id": "' + message.tool_call_id[-9:] + '"}[/TOOL_RESULTS]' }}
+    {%- else %}
+        {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
+    {%- endif %}
+{%- endfor %}
diff --git a/WorkflowExecAgent/tools/components/component.py b/WorkflowExecAgent/tools/components/component.py
new file mode 100644
index 0000000000..e0491aaa19
--- /dev/null
+++ b/WorkflowExecAgent/tools/components/component.py
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+
+class Component:
+    def __init__(self, request_handler):
+        self.request_handler = request_handler
+
+    def _make_request(self, *args, **kwargs):
+        return self.request_handler._make_request(*args, **kwargs)
diff --git a/WorkflowExecAgent/tools/components/workflow.py b/WorkflowExecAgent/tools/components/workflow.py
new file mode 100644
index 0000000000..7ac1863c26
--- /dev/null
+++ b/WorkflowExecAgent/tools/components/workflow.py
@@ -0,0 +1,70 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+from typing import Dict
+
+from tools.components.component import Component
+
+
+class Workflow(Component):
+    """Class for handling EasyData workflow operations.
+
+    Attributes:
+        workflow_id: workflow id
+        wf_key: workflow key. Generated and stored when starting a servable workflow.
+    """
+
+    def __init__(self, request_handler, workflow_id=None, workflow_key=None):
+        super().__init__(request_handler)
+        self.workflow_id = workflow_id
+        self.wf_key = workflow_key
+
+    def start(self, params: Dict[str, str]) -> Dict[str, str]:
+        """
+        ``POST https://SDK_BASE_URL/serving/servable_workflows/{workflow_id}/start``
+
+        Starts a workflow instance with the workflow_id and parameters provided.
+        Returns a workflow key used to track the workflow instance.
+
+        :param dict params: Workflow parameters used to start workflow.
+
+        :returns: WorkflowKey
+
+        :rtype: string
+        """
+        data = json.dumps({"params": params})
+        endpoint = f"serving/servable_workflows/{self.workflow_id}/start"
+        self.wf_key = self._make_request(endpoint, "POST", data)["wf_key"]
+        if self.wf_key:
+            return f"Workflow successfully started. The workflow key is {self.wf_key}."
+        else:
+            return "Workflow failed to start"
+
+    def get_status(self) -> Dict[str, str]:
+        """
+        ``GET https://SDK_BASE_URL/serving/serving_workflows/{workflow_key}/status``
+
+        Gets the workflow status.
+
+        :returns: WorkflowStatus
+
+        :rtype: string
+        """
+
+        endpoint = f"serving/serving_workflows/{self.wf_key}/status"
+        return self._make_request(endpoint, "GET")
+
+    def result(self) -> list[Dict[str, str]]:
+        """
+        ``GET https://SDK_BASE_URL/serving/serving_workflows/{workflow_key}/results``
+
+        Gets the workflow output result.
+
+        :returns: WorkflowOutputData
+
+        :rtype: string
+        """
+
+        endpoint = f"serving/serving_workflows/{self.wf_key}/results"
+        return self._make_request(endpoint, "GET")
diff --git a/WorkflowExecAgent/tools/custom_prompt.py b/WorkflowExecAgent/tools/custom_prompt.py
new file mode 100644
index 0000000000..bdad5d6b92
--- /dev/null
+++ b/WorkflowExecAgent/tools/custom_prompt.py
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+REACT_SYS_MESSAGE = """\
+You are a helpful assistant. You are to start the workflow using the tool provided.
+After the workflow is completed, you will use the output data to answer the user's original question in a one short sentence.
+
+Now begin!
+"""
diff --git a/WorkflowExecAgent/tools/sdk.py b/WorkflowExecAgent/tools/sdk.py
new file mode 100644
index 0000000000..30838887f5
--- /dev/null
+++ b/WorkflowExecAgent/tools/sdk.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from tools.components.workflow import Workflow
+from tools.utils.handle_requests import RequestHandler
+
+
+class EasyDataSDK:
+    def __init__(self):
+        self.request_handler = RequestHandler(os.environ["SDK_BASE_URL"], os.environ["SERVING_TOKEN"])
+
+    def create_workflow(self, workflow_id=None, workflow_key=None):
+        return Workflow(
+            self.request_handler,
+            workflow_id=workflow_id,
+            workflow_key=workflow_key,
+        )
diff --git a/WorkflowExecAgent/tools/tools.py b/WorkflowExecAgent/tools/tools.py
new file mode 100644
index 0000000000..4a9c98909e
--- /dev/null
+++ b/WorkflowExecAgent/tools/tools.py
@@ -0,0 +1,42 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import time
+
+from tools.sdk import EasyDataSDK
+
+
+def workflow_executor(params, workflow_id: int) -> dict:
+    sdk = EasyDataSDK()
+    workflow = sdk.create_workflow(workflow_id)
+
+    params = {key: str(val) for key, val in params.items()}
+    start_workflow = workflow.start(params)
+    print(start_workflow)
+
+    def check_workflow():
+        workflow_status = workflow.get_status()["workflow_status"]
+        if workflow_status == "finished":
+            message = "Workflow finished."
+        elif workflow_status == "initializing" or workflow_status == "running":
+            message = "Workflow execution is still in progress."
+        else:
+            message = "Workflow has failed."
+
+        return workflow_status, message
+
+    MAX_RETRY = 50
+    num_retry = 0
+    while num_retry < MAX_RETRY:
+        workflow_status, message = check_workflow()
+        print(message)
+        if workflow_status == "failed" or workflow_status == "finished":
+            break
+        else:
+            time.sleep(100)  # interval between each status checking retry
+            num_retry += 1
+
+    if workflow_status == "finished":
+        return workflow.result()
+    else:
+        return message
diff --git a/WorkflowExecAgent/tools/tools.yaml b/WorkflowExecAgent/tools/tools.yaml
new file mode 100644
index 0000000000..c326d55063
--- /dev/null
+++ b/WorkflowExecAgent/tools/tools.yaml
@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+workflow_executor:
+  description: "Starts a workflow with the given workflow id and params. Gets the output result of the workflow."
+  callable_api: tools.py:workflow_executor
+  args_schema:
+    workflow_id:
+      type: int
+      description: Workflow id
+    params:
+      type: dict
+      description: Workflow parameters.
+  return_output: workflow_data
diff --git a/WorkflowExecAgent/tools/utils/handle_requests.py b/WorkflowExecAgent/tools/utils/handle_requests.py
new file mode 100644
index 0000000000..e9806d09d3
--- /dev/null
+++ b/WorkflowExecAgent/tools/utils/handle_requests.py
@@ -0,0 +1,76 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from functools import wraps
+
+import requests
+
+
+class RequestHandler:
+    """Class for handling requests.
+
+    Attributes:
+        base_url (string): The url of the API.
+        api_key (string): Secret token.
+    """
+
+    def __init__(self, base_url: str, api_key: str):
+        self.base_url = base_url
+        self.api_key = api_key
+
+    def _make_request(self, endpoint, method="GET", data=None, stream=False):
+        url = f"{self.base_url}{endpoint}"
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
+
+        error = ""
+
+        if method == "GET":
+            response = requests.get(url, headers=headers)
+        elif method == "POST":
+            response = requests.post(url, data, headers=headers, stream=stream)
+        elif method == "PUT":
+            response = requests.put(url, data, headers=headers)
+        elif method == "DELETE":
+            response = requests.delete(url, headers=headers)
+        else:
+            raise ValueError(f"error: Invalid HTTP method {method}")
+
+        @self._handle_request
+        def check_status(response):
+            response.raise_for_status()
+
+        error = check_status(response)
+
+        if error:
+            return error
+
+        else:
+            try:
+                response.json()
+                return response.json()
+            except:
+                return response
+
+    def _handle_request(self, func):
+        @wraps(func)
+        def decorated(response=None, *args, **kwargs):
+            if response is not None:
+                try:
+                    return func(response, *args, **kwargs)
+
+                except requests.exceptions.HTTPError as errh:
+                    error = {"error": f"{response.status_code} {response.reason} HTTP Error {errh}"}
+                except requests.exceptions.ConnectionError as errc:
+                    error = {"error": f"{response.status_code} {response.reason} Connection Error {errc}"}
+                except requests.exceptions.Timeout as errt:
+                    error = {"error": f"{response.status_code} {response.reason} Timeout Error {errt}"}
+                except requests.exceptions.ChunkedEncodingError as errck:
+                    error = {"error": f"Invalid chunk encoding: {str(errck)}"}
+                except requests.exceptions.RequestException as err:
+                    error = {"error": f"{response.status_code} {response.reason} {err}"}
+                except Exception as err:
+                    error = err
+
+                return error
+
+        return decorated
diff --git a/docker_images_list.md b/docker_images_list.md
index b0bff43ecb..ea25a906ea 100644
--- a/docker_images_list.md
+++ b/docker_images_list.md
@@ -26,8 +26,8 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
 | [opea/faqgen](https://hub.docker.com/r/opea/faqgen)                                                         | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/Dockerfile)                            | The docker image served as a faqgen gateway and automatically generating comprehensive, natural sounding Frequently Asked Questions (FAQs) from documents, legal texts, customer inquiries and other sources.                                                                          |
 | [opea/faqgen-ui](https://hub.docker.com/r/opea/faqgen-ui)                                                   | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/ui/docker/Dockerfile)                  | The docker image serves as the docsum UI entry point for easy interaction with users, generating FAQs by pasting in question text.                                                                                                                                                     |
 | [opea/faqgen-react-ui](https://hub.docker.com/r/opea/faqgen-react-ui)                                       | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/ui/docker/Dockerfile.react)            | The purpose of the docker image is to provide a user interface for Generate FAQs using React. It allows generating FAQs by uploading files or pasting text.                                                                                                                            |
-| [opea/multimodalqna](https://hub.docker.com/r/opea/multimodalqna)                                           | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/Dockerfile)                     | The docker image served as a multimodalqna gateway and dynamically fetches the most relevant multimodal information (frames, transcripts, and/or subtitles) from the user's video collection to solve the problem.                                                                     |
-| [opea/multimodalqna-ui](https://hub.docker.com/r/opea/multimodalqna-ui)                                     | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/ui/docker/Dockerfile)           | The docker image serves as the docsum UI entry point for easy interaction with users. Answers to questions are generated from videos uploaded by users..                                                                                                                               |
+| [opea/multimodalqna](https://hub.docker.com/r/opea/multimodalqna)                                           | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/Dockerfile)                     | The docker image served as a multimodalqna gateway and dynamically fetches the most relevant multimodal information (frames, transcripts, and/or subtitles) from the user's video, image, or audio collection to solve the problem.                                                    |
+| [opea/multimodalqna-ui](https://hub.docker.com/r/opea/multimodalqna-ui)                                     | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/ui/docker/Dockerfile)           | The docker image serves as the multimodalqna UI entry point for easy interaction with users. Answers to questions are generated from uploaded by users.                                                                                                                                |
 | [opea/productivity-suite-react-ui-server](https://hub.docker.com/r/opea/productivity-suite-react-ui-server) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ProductivitySuite/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for Productivity Suite Application using React. It allows interaction by uploading documents and inputs.                                                                                                                |
 | [opea/searchqna](https://hub.docker.com/r/opea/searchqna/tags)                                              | [Link](https://github.com/opea-project/GenAIExamples/blob/main/SearchQnA/Dockerfile)                         | The docker image served as the searchqna gateway to provide service of retrieving accurate and relevant answers to user queries from a knowledge base or dataset                                                                                                                       |
 | [opea/searchqna-ui](https://hub.docker.com/r/opea/searchqna-ui)                                             | [Link](https://github.com/opea-project/GenAIExamples/blob/main/SearchQnA/ui/docker/Dockerfile)               | The docker image acted as the searchqna UI entry for facilitating interaction with users for question answering                                                                                                                                                                        |
@@ -40,77 +40,74 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
 
 ## Microservice images
 
-| Microservice Images                                                                                                 | Dockerfile                                                                                                                               | Description                                                                                                                                                                                                                |
-| ------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [opea/agent-langchain](https://hub.docker.com/r/opea/comps-agent-langchain)                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/langchain/Dockerfile)                                            | The docker image exposed the OPEA agent microservice for GenAI application use                                                                                                                                             |
-| [opea/asr](https://hub.docker.com/r/opea/asr)                                                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/whisper/Dockerfile)                                                | The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use                                                                                                                          |
-| [opea/chathistory-mongo-server](https://hub.docker.com/r/opea/chathistory-mongo-server)                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/chathistory/mongo/Dockerfile)                                          | The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations                                                           |
-| [opea/dataprep-milvus](https://hub.docker.com/r/opea/dataprep-milvus)                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/milvus/langchain/Dockerfile)                                  | The docker image exposed the OPEA dataprep microservice based on milvus vectordb for GenAI application use                                                                                                                 |
-| [opea/dataprep-multimodal-vdms](https://hub.docker.com/r/opea/dataprep-multimodal-vdms)                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/vdms/multimodal_langchain/Dockerfile)                         | This docker image exposes an OPEA dataprep microservice based on a multi-modal VDMS for use by GenAI applications.                                                                                                         |
-| [opea/dataprep-multimodal-redis](https://hub.docker.com/r/opea/dataprep-multimodal-redis)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain/Dockerfile)                                   | This docker image exposes an OPEA dataprep microservice based on a multi-modal redis for use by GenAI applications.                                                                                                        |
-| [opea/dataprep-on-ray-redis](https://hub.docker.com/r/opea/dataprep-on-ray-redis)                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain_ray/Dockerfile)                               | The docker image exposed the OPEA dataprep microservice based on redis vectordb and optimized ray for GenAI application use                                                                                                |
-| [opea/dataprep-pgvector](https://hub.docker.com/r/opea/dataprep-pgvector)                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/pgvector/langchain/Dockerfile)                                | The docker image exposed the OPEA dataprep microservice based on pgvector vectordb for GenAI application use                                                                                                               |
-| [opea/dataprep-pinecone](https://hub.docker.com/r/opea/dataprep-pinecone)                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/pinecone/langchain/Dockerfile)                                | The docker image exposed the OPEA dataprep microservice based on pincone vectordb for GenAI application use                                                                                                                |
-| [opea/dataprep-qdrant](https://hub.docker.com/r/opea/dataprep-qdrant)                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/qdrant/langchain/Dockerfile)                                  | The docker image exposed the OPEA dataprep microservice based on qdrant vectordb for GenAI application use                                                                                                                 |
-| [opea/dataprep-redis](https://hub.docker.com/r/opea/dataprep-redis)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain/Dockerfile)                                   | The docker image exposed the OPEA dataprep microservice based on redis vectordb Langchain framework for GenAI application use                                                                                              |
-| [opea/dataprep-redis-llama-index](https://hub.docker.com/r/opea/dataprep-redis-llama-index)                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/llama_index/Dockerfile)                                 | The docker image exposed the OPEA dataprep microservice based on redis vectordb LlamaIndex framework for GenAI application use                                                                                             |
-| [opea/dataprep-vdms](https://hub.docker.com/r/opea/dataprep-vdms)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/vdms/langchain/Dockerfile)                                    | This docker image exposes an OPEA dataprep microservice based on VDMS vectordb for use by GenAI applications.                                                                                                              |
-| [opea/embedding-langchain-mosec](https://hub.docker.com/r/opea/embedding-langchain-mosec)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/mosec/langchain/Dockerfile)                                 | The docker image exposed the OPEA mosec embedding microservice base on Langchain framework for GenAI application use                                                                                                       |
-| [opea/embedding-langchain-mosec-endpoint](https://hub.docker.com/r/opea/embedding-langchain-mosec-endpoint)         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/mosec/langchain/dependency/Dockerfile)                      | The docker image exposed the OPEA mosec embedding endpoint microservice base on Langchain framework for GenAI application use                                                                                              |
-| [opea/embedding-multimodal-clip](https://hub.docker.com/r/opea/embedding-multimodal-clip)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/multimodal_clip/Dockerfile)                                 | The docker image exposes OPEA multimodal CLIP-based embedded microservices for use by GenAI applications                                                                                                                   |
-| [opea/embedding-multimodal](https://hub.docker.com/r/opea/embedding-multimodal)                                     | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/multimodal/multimodal_langchain/Dockerfile)                 | The docker image exposes OPEA multimodal embedded microservices for use by GenAI applications                                                                                                                              |
-| [opea/embedding-multimodal-bridgetower](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower)             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/multimodal/bridgetower/Dockerfile)                          | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications                                                                                                         |
-| [opea/embedding-multimodal-bridgetower-gaudi](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu)                | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications on the Gaudi                                                                                            |
-| [opea/embedding-tei](https://hub.docker.com/r/opea/embedding-tei)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/tei/langchain/Dockerfile)                                   | The docker image exposed the OPEA embedding microservice upon tei docker image for GenAI application use                                                                                                                   |
-| [opea/embedding-tei-llama-index](https://hub.docker.com/r/opea/embedding-tei-llama-index)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/tei/llama_index/Dockerfile)                                 | The docker image exposed the OPEA embedding microservice upon tei docker image base on LlamaIndex framework for GenAI application use                                                                                      |
-| [opea/feedbackmanagement](https://hub.docker.com/r/opea/feedbackmanagement)                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/feedback_management/mongo/Dockerfile)                                  | The docker image exposes that the OPEA feedback management microservice uses a MongoDB database for GenAI applications.                                                                                                    |
-| [opea/finetuning](https://hub.docker.com/r/opea/finetuning)                                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/Dockerfile)                                                 | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use                                                                                                                                       |
-| [opea/finetuning-gaudi](https://hub.docker.com/r/opea/finetuning-gaudi)                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/Dockerfile.intel_hpu)                                       | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use on the Gaudi                                                                                                                          |
-| [opea/gmcrouter](https://hub.docker.com/r/opea/gmcrouter)                                                           | [Link](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/Dockerfile.manager)                                  | The docker image served as one of key parts of the OPEA GenAI Microservice Connector(GMC) to route the traffic among the microservices defined in GMC                                                                      |
-| [opea/gmcmanager](https://hub.docker.com/r/opea/gmcmanager)                                                         | [Link](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/Dockerfile.router)                                   | The docker image served as one of key parts of the OPEA GenAI Microservice Connector(GMC) to be controller manager to handle GMC CRD                                                                                       |
-| [opea/guardrails-tgi](https://hub.docker.com/r/opea/guardrails-tgi)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/llama_guard/langchain/Dockerfile)                           | The docker image exposed the OPEA guardrail microservice to provide content review for GenAI application use                                                                                                               |
-| [opea/guardrails-toxicity-detection](https://hub.docker.com/r/opea/guardrails-toxicity-detection)                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/toxicity_detection/Dockerfile)                              | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use                                                                                                           |
-| [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection)                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/pii_detection/Dockerfile)                                   | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use                                                                                                                |
-| [opea/llm-docsum-tgi](https://hub.docker.com/r/opea/llm-docsum-tgi)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/summarization/tgi/langchain/Dockerfile)                           | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary.     |
-| [opea/llm-faqgen-tgi](https://hub.docker.com/r/opea/llm-faqgen-tgi)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/faq-generation/tgi/langchain/Dockerfile)                          | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ.              |
-| [opea/llm-native](https://hub.docker.com/r/opea/llm-native)                                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/native/langchain/Dockerfile)                      | The docker image exposed the OPEA LLM microservice based on native for GenAI application use                                                                                                                               |
-| [opea/llm-ollama](https://hub.docker.com/r/opea/llm-ollama)                                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/ollama/langchain/Dockerfile)                      | The docker image exposed the OPEA LLM microservice based on ollama for GenAI application use                                                                                                                               |
-| [opea/llm-tgi](https://hub.docker.com/r/opea/llm-tgi)                                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/tgi/Dockerfile)                                   | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use                                                                                                                         |
-| [opea/llm-vllm](https://hub.docker.com/r/opea/llm-vllm)                                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/langchain/Dockerfile)                        | The docker image exposed the OPEA LLM microservice upon vLLM docker image for GenAI application use                                                                                                                        |
-| [opea/llm-vllm-hpu](https://hub.docker.com/r/opea/llm-vllm-hpu)                                                     | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu)   | The docker image exposed the OPEA LLM microservice upon vLLM docker image for use by GenAI apps on the Gaudi                                                                                                               |
-| [opea/llm-vllm-llamaindex](https://hub.docker.com/r/opea/llm-vllm-llamaindex)                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/llama_index/Dockerfile)                      | This docker image exposes OPEA LLM microservices to the llamaindex framework's vLLM Docker image for use by GenAI applications                                                                                             |
-| [opea/llm-vllm-llamaindex-hpu](https://hub.docker.com/r/opea/llm-vllm-llamaindex-hpu)                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/llama_index/dependency/Dockerfile.intel_hpu) | This docker image exposes OPEA LLM microservices to the llamaindex framework's vLLM Docker image for use by GenAI applications on the gaudi                                                                                |
-| [opea/llm-vllm-ray](https://hub.docker.com/r/opea/llm-vllm-ray)                                                     | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/ray/Dockerfile)                              | The docker image exposes the OPEA LLM microservices Ray-based upon the vLLM Docker image for GenAI application use                                                                                                         |
-| [opea/llm-vllm-ray-hpu](https://hub.docker.com/r/opea/llm-vllm-ray-hpu)                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/ray/dependency/Dockerfile)                   | The docker image exposes Ray-based OPEA LLM microservices upon the vLLM Docker image for use by GenAI applications on the Gaudi                                                                                            |
-| [opea/llava-hpu](https://hub.docker.com/r/opea/llava-hpu)                                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/llava/dependency/Dockerfile.intel_hpu)                            | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi                                                                                  |
-| [opea/lvm-tgi](https://hub.docker.com/r/opea/lvm-tgi)                                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/tgi-llava/Dockerfile)                                             | This docker image is designed to build a large visual model (LVM) microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a answer to question. |
-| [opea/lvm-llava](https://hub.docker.com/r/opea/lvm-llava)                                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/llava/dependency/Dockerfile)                                      | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) server for GenAI application use                                                                                                |
-| [opea/lvm-llava-svc](https://hub.docker.com/r/opea/lvm-llava-svc)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/llava/Dockerfile)                                                 | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use                                                                                               |
-| [opea/lvm-video-llama](https://hub.docker.com/r/opea/lvm-video-llama)                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/video-llama/Dockerfile)                                           | The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) for GenAI application use                                                                                                 |
-| [opea/nginx](https://hub.docker.com/r/opea/nginx)                                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/nginx/Dockerfile)                                                      | The docker image exposed the OPEA nginx microservice for GenAI application use                                                                                                                                             |
-| [opea/promptregistry-mongo-server](https://hub.docker.com/r/opea/promptregistry-mongo-server)                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/prompt_registry/mongo/Dockerfile)                                      | The docker image exposes the OPEA Prompt Registry microservices which based on MongoDB database, designed to store and retrieve user's preferred prompts                                                                   |
-| [opea/reranking-videoqna](https://hub.docker.com/r/opea/reranking-videoqna)                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/videoqna/Dockerfile)                                           | The docker image exposed the OPEA reranking microservice for reranking the results of VideoQnA use casesfor GenAI application use                                                                                          |
-| [opea/reranking-fastrag](https://hub.docker.com/r/opea/reranking-fastrag)                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/fastrag/Dockerfile)                                            | The docker image exposed the OPEA reranking microservice base on fastrag for GenAI application use                                                                                                                         |
-| [opea/reranking-langchain-mosec](https://hub.docker.com/r/opea/reranking-langchain-mosec)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/mosec/langchain/Dockerfile)                                    | The docker image exposed the OPEA mosec reranking microservice base on Langchain framework for GenAI application use                                                                                                       |
-| [opea/reranking-langchain-mosec-endpoint](https://hub.docker.com/r/opea/reranking-langchain-mosec-endpoint)         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/mosec/langchain/dependency/Dockerfile)                         | The docker image exposed the OPEA mosec reranking endpoint microservice base on Langchain framework for GenAI application use                                                                                              |
-| [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/tei/Dockerfile)                                                | The docker image exposed the OPEA reranking microservice based on tei docker image for GenAI application use                                                                                                               |
-| [opea/retriever-milvus](https://hub.docker.com/r/opea/retriever-milvus)                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/milvus/langchain/Dockerfile)                                | The docker image exposed the OPEA retrieval microservice based on milvus vectordb for GenAI application use                                                                                                                |
-| [opea/retriever-multimodal-redis](https://hub.docker.com/r/opea/retriever-multimodal-redis)                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/multimodal/redis/langchain/Dockerfile)                      | The docker image exposed the OPEA retrieval microservice based on multimodal redis vectordb for GenAI application use                                                                                                      |
-| [opea/retriever-pathway](https://hub.docker.com/r/opea/retriever-pathway)                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/pathway/langchain/Dockerfile)                               | The docker image exposed the OPEA retrieval microservice with pathway for GenAI application use                                                                                                                            |
-| [opea/retriever-pgvector](https://hub.docker.com/r/opea/retriever-pgvector)                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/pgvector/langchain/Dockerfile)                              | The docker image exposed the OPEA retrieval microservice based on pgvector vectordb for GenAI application use                                                                                                              |
-| [opea/retriever-pinecone](https://hub.docker.com/r/opea/retriever-pinecone)                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/pinecone/langchain/Dockerfile)                              | The docker image exposed the OPEA retrieval microservice based on pinecone vectordb for GenAI application use                                                                                                              |
-| [opea/retriever-qdrant](https://hub.docker.com/r/opea/retriever-qdrant)                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/qdrant/haystack/Dockerfile)                                 | The docker image exposed the OPEA retrieval microservice based on qdrant vectordb for GenAI application use                                                                                                                |
-| [opea/retriever-redis](https://hub.docker.com/r/opea/retriever-redis)                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/redis/langchain/Dockerfile)                                 | The docker image exposed the OPEA retrieval microservice based on redis vectordb for GenAI application use                                                                                                                 |
-| [opea/retriever-redis-llamaindex](https://hub.docker.com/r/opea/retriever-redis-llamaindex)                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/redis/llama_index/Dockerfile)                               | The docker image exposed the OPEA retriever service based on LlamaIndex for GenAI application use                                                                                                                          |
-| [opea/retriever-vdms](https://hub.docker.com/r/opea/retriever-vdms)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/vdms/langchain/Dockerfile)                                  | The docker image exposed the OPEA retriever service based on Visual Data Management System for GenAI application use                                                                                                       |
-| [opea/speecht5](https://hub.docker.com/r/opea/speecht5)                                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/speecht5/dependency/Dockerfile)                                    | The docker image exposed the OPEA SpeechT5 service for GenAI application use                                                                                                                                               |
-| [opea/speecht5-gaudi](https://hub.docker.com/r/opea/speecht5-gaudi)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/speecht5/dependency/Dockerfile.intel_hpu)                          | The docker image exposed the OPEA SpeechT5 service on Gaudi2 for GenAI application use                                                                                                                                     |
-| [opea/tei-gaudi](https://hub.docker.com/r/opea/tei-gaudi/tags)                                                      | [Link](https://github.com/huggingface/tei-gaudi/blob/habana-main/Dockerfile-hpu)                                                         | The docker image powered by HuggingFace Text Embedding Inference (TEI) on Gaudi2 for deploying and serving Embedding Models                                                                                                |
-| [opea/vectorstore-pathway](https://hub.docker.com/r/opea/vectorstore-pathway)                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/vectorstores/pathway/Dockerfile)                                       | The docker image exposed the OPEA Vectorstores microservice with Pathway for GenAI application use                                                                                                                         |
-| [opea/video-llama-lvm-server](https://hub.docker.com/r/opea/video-llama-lvm-server)                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/video-llama/dependency/Dockerfile)                                | The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) server for GenAI application use                                                                                          |
-| [opea/tts](https://hub.docker.com/r/opea/tts)                                                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/speecht5/Dockerfile)                                               | The docker image exposed the OPEA Text-To-Speech microservice for GenAI application use                                                                                                                                    |
-| [opea/vllm](https://hub.docker.com/r/opea/vllm)                                                                     | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.cpu)                                                                    | The docker image powered by vllm-project for deploying and serving vllm Models                                                                                                                                             |
-| [opea/vllm-openvino](https://hub.docker.com/r/opea/vllm-openvino)                                                   | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.openvino)                                                               | The docker image powered by vllm-project for deploying and serving vllm Models of the Openvino Framework                                                                                                                   |
-| [opea/web-retriever-chroma](https://hub.docker.com/r/opea/web-retriever-chroma)                                     | [Link](https://github.com/opea-project/GenAIComps/tree/main/comps/web_retrievers/chroma/langchain/Dockerfile)                            | The docker image exposed the OPEA retrieval microservice based on chroma vectordb for GenAI application use                                                                                                                |
-| [opea/whisper](https://hub.docker.com/r/opea/whisper)                                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/whisper/dependency/Dockerfile)                                     | The docker image exposed the OPEA Whisper service for GenAI application use                                                                                                                                                |
-| [opea/whisper-gaudi](https://hub.docker.com/r/opea/whisper-gaudi)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/whisper/dependency/Dockerfile.intel_hpu)                           | The docker image exposed the OPEA Whisper service on Gaudi2 for GenAI application use                                                                                                                                      |
+| Microservice Images                                                                                                 | Dockerfile                                                                                                                | Description                                                                                                                                                                                                                |
+| ------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [opea/agent-langchain](https://hub.docker.com/r/opea/comps-agent-langchain)                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/langchain/Dockerfile)                             | The docker image exposed the OPEA agent microservice for GenAI application use                                                                                                                                             |
+| [opea/asr](https://hub.docker.com/r/opea/asr)                                                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/whisper/Dockerfile)                                 | The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use                                                                                                                          |
+| [opea/chathistory-mongo-server](https://hub.docker.com/r/opea/chathistory-mongo-server)                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/chathistory/mongo/Dockerfile)                           | The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations                                                           |
+| [opea/dataprep-milvus](https://hub.docker.com/r/opea/dataprep-milvus)                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/milvus/langchain/Dockerfile)                   | The docker image exposed the OPEA dataprep microservice based on milvus vectordb for GenAI application use                                                                                                                 |
+| [opea/dataprep-multimodal-vdms](https://hub.docker.com/r/opea/dataprep-multimodal-vdms)                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/vdms/multimodal_langchain/Dockerfile)          | This docker image exposes an OPEA dataprep microservice based on a multi-modal VDMS for use by GenAI applications.                                                                                                         |
+| [opea/dataprep-multimodal-redis](https://hub.docker.com/r/opea/dataprep-multimodal-redis)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain/Dockerfile)                    | This docker image exposes an OPEA dataprep microservice based on a multi-modal redis for use by GenAI applications.                                                                                                        |
+| [opea/dataprep-on-ray-redis](https://hub.docker.com/r/opea/dataprep-on-ray-redis)                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain_ray/Dockerfile)                | The docker image exposed the OPEA dataprep microservice based on redis vectordb and optimized ray for GenAI application use                                                                                                |
+| [opea/dataprep-pgvector](https://hub.docker.com/r/opea/dataprep-pgvector)                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/pgvector/langchain/Dockerfile)                 | The docker image exposed the OPEA dataprep microservice based on pgvector vectordb for GenAI application use                                                                                                               |
+| [opea/dataprep-pinecone](https://hub.docker.com/r/opea/dataprep-pinecone)                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/pinecone/langchain/Dockerfile)                 | The docker image exposed the OPEA dataprep microservice based on pincone vectordb for GenAI application use                                                                                                                |
+| [opea/dataprep-qdrant](https://hub.docker.com/r/opea/dataprep-qdrant)                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/qdrant/langchain/Dockerfile)                   | The docker image exposed the OPEA dataprep microservice based on qdrant vectordb for GenAI application use                                                                                                                 |
+| [opea/dataprep-redis](https://hub.docker.com/r/opea/dataprep-redis)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain/Dockerfile)                    | The docker image exposed the OPEA dataprep microservice based on redis vectordb Langchain framework for GenAI application use                                                                                              |
+| [opea/dataprep-redis-llama-index](https://hub.docker.com/r/opea/dataprep-redis-llama-index)                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/llama_index/Dockerfile)                  | The docker image exposed the OPEA dataprep microservice based on redis vectordb LlamaIndex framework for GenAI application use                                                                                             |
+| [opea/dataprep-vdms](https://hub.docker.com/r/opea/dataprep-vdms)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/vdms/langchain/Dockerfile)                     | This docker image exposes an OPEA dataprep microservice based on VDMS vectordb for use by GenAI applications.                                                                                                              |
+| [opea/embedding-langchain-mosec](https://hub.docker.com/r/opea/embedding-langchain-mosec)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/mosec/langchain/Dockerfile)                  | The docker image exposed the OPEA mosec embedding microservice base on Langchain framework for GenAI application use                                                                                                       |
+| [opea/embedding-langchain-mosec-endpoint](https://hub.docker.com/r/opea/embedding-langchain-mosec-endpoint)         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/mosec/langchain/dependency/Dockerfile)       | The docker image exposed the OPEA mosec embedding endpoint microservice base on Langchain framework for GenAI application use                                                                                              |
+| [opea/embedding-multimodal-clip](https://hub.docker.com/r/opea/embedding-multimodal-clip)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/multimodal_clip/Dockerfile)                  | The docker image exposes OPEA multimodal CLIP-based embedded microservices for use by GenAI applications                                                                                                                   |
+| [opea/embedding-multimodal](https://hub.docker.com/r/opea/embedding-multimodal)                                     | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/multimodal/multimodal_langchain/Dockerfile)  | The docker image exposes OPEA multimodal embedded microservices for use by GenAI applications                                                                                                                              |
+| [opea/embedding-multimodal-bridgetower](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower)             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/multimodal/bridgetower/Dockerfile)           | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications                                                                                                         |
+| [opea/embedding-multimodal-bridgetower-gaudi](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower-gaudi) | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu) | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications on the Gaudi                                                                                            |
+| [opea/embedding-tei](https://hub.docker.com/r/opea/embedding-tei)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/tei/langchain/Dockerfile)                    | The docker image exposed the OPEA embedding microservice upon tei docker image for GenAI application use                                                                                                                   |
+| [opea/embedding-tei-llama-index](https://hub.docker.com/r/opea/embedding-tei-llama-index)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/tei/llama_index/Dockerfile)                  | The docker image exposed the OPEA embedding microservice upon tei docker image base on LlamaIndex framework for GenAI application use                                                                                      |
+| [opea/feedbackmanagement](https://hub.docker.com/r/opea/feedbackmanagement)                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/feedback_management/mongo/Dockerfile)                   | The docker image exposes that the OPEA feedback management microservice uses a MongoDB database for GenAI applications.                                                                                                    |
+| [opea/finetuning](https://hub.docker.com/r/opea/finetuning)                                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/Dockerfile)                                  | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use                                                                                                                                       |
+| [opea/finetuning-gaudi](https://hub.docker.com/r/opea/finetuning-gaudi)                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/Dockerfile.intel_hpu)                        | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use on the Gaudi                                                                                                                          |
+| [opea/gmcrouter](https://hub.docker.com/r/opea/gmcrouter)                                                           | [Link](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/Dockerfile.manager)                   | The docker image served as one of key parts of the OPEA GenAI Microservice Connector(GMC) to route the traffic among the microservices defined in GMC                                                                      |
+| [opea/gmcmanager](https://hub.docker.com/r/opea/gmcmanager)                                                         | [Link](https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/Dockerfile.router)                    | The docker image served as one of key parts of the OPEA GenAI Microservice Connector(GMC) to be controller manager to handle GMC CRD                                                                                       |
+| [opea/guardrails-tgi](https://hub.docker.com/r/opea/guardrails-tgi)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/llama_guard/langchain/Dockerfile)            | The docker image exposed the OPEA guardrail microservice to provide content review for GenAI application use                                                                                                               |
+| [opea/guardrails-toxicity-detection](https://hub.docker.com/r/opea/guardrails-toxicity-detection)                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/toxicity_detection/Dockerfile)               | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use                                                                                                           |
+| [opea/guardrails-pii-detection](https://hub.docker.com/r/opea/guardrails-pii-detection)                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/pii_detection/Dockerfile)                    | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use                                                                                                                |
+| [opea/llm-docsum-tgi](https://hub.docker.com/r/opea/llm-docsum-tgi)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/summarization/tgi/langchain/Dockerfile)            | This docker image is designed to build a document summarization microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a document summary.     |
+| [opea/llm-faqgen-tgi](https://hub.docker.com/r/opea/llm-faqgen-tgi)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/faq-generation/tgi/langchain/Dockerfile)           | This docker image is designed to build a frequently asked questions microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a FAQ.              |
+| [opea/llm-native](https://hub.docker.com/r/opea/llm-native)                                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/native/langchain/Dockerfile)       | The docker image exposed the OPEA LLM microservice based on native for GenAI application use                                                                                                                               |
+| [opea/llm-ollama](https://hub.docker.com/r/opea/llm-ollama)                                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/ollama/langchain/Dockerfile)       | The docker image exposed the OPEA LLM microservice based on ollama for GenAI application use                                                                                                                               |
+| [opea/llm-tgi](https://hub.docker.com/r/opea/llm-tgi)                                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/tgi/Dockerfile)                    | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use                                                                                                                         |
+| [opea/llm-vllm](https://hub.docker.com/r/opea/llm-vllm)                                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/langchain/Dockerfile)         | The docker image exposed the OPEA LLM microservice upon vLLM docker image for GenAI application use                                                                                                                        |
+| [opea/llm-vllm-llamaindex](https://hub.docker.com/r/opea/llm-vllm-llamaindex)                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/llama_index/Dockerfile)       | This docker image exposes OPEA LLM microservices to the llamaindex framework's vLLM Docker image for use by GenAI applications                                                                                             |
+| [opea/llava-hpu](https://hub.docker.com/r/opea/llava-hpu)                                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/llava/dependency/Dockerfile.intel_hpu)             | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi                                                                                  |
+| [opea/lvm-tgi](https://hub.docker.com/r/opea/lvm-tgi)                                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/tgi-llava/Dockerfile)                              | This docker image is designed to build a large visual model (LVM) microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a answer to question. |
+| [opea/lvm-llava](https://hub.docker.com/r/opea/lvm-llava)                                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/llava/dependency/Dockerfile)                       | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) server for GenAI application use                                                                                                |
+| [opea/lvm-llava-svc](https://hub.docker.com/r/opea/lvm-llava-svc)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/llava/Dockerfile)                                  | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use                                                                                               |
+| [opea/lvm-video-llama](https://hub.docker.com/r/opea/lvm-video-llama)                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/video-llama/Dockerfile)                            | The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) for GenAI application use                                                                                                 |
+| [opea/nginx](https://hub.docker.com/r/opea/nginx)                                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/nginx/Dockerfile)                                       | The docker image exposed the OPEA nginx microservice for GenAI application use                                                                                                                                             |
+| [opea/promptregistry-mongo-server](https://hub.docker.com/r/opea/promptregistry-mongo-server)                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/prompt_registry/mongo/Dockerfile)                       | The docker image exposes the OPEA Prompt Registry microservices which based on MongoDB database, designed to store and retrieve user's preferred prompts                                                                   |
+| [opea/reranking-videoqna](https://hub.docker.com/r/opea/reranking-videoqna)                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/videoqna/Dockerfile)                            | The docker image exposed the OPEA reranking microservice for reranking the results of VideoQnA use casesfor GenAI application use                                                                                          |
+| [opea/reranking-fastrag](https://hub.docker.com/r/opea/reranking-fastrag)                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/fastrag/Dockerfile)                             | The docker image exposed the OPEA reranking microservice base on fastrag for GenAI application use                                                                                                                         |
+| [opea/reranking-langchain-mosec](https://hub.docker.com/r/opea/reranking-langchain-mosec)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/mosec/langchain/Dockerfile)                     | The docker image exposed the OPEA mosec reranking microservice base on Langchain framework for GenAI application use                                                                                                       |
+| [opea/reranking-langchain-mosec-endpoint](https://hub.docker.com/r/opea/reranking-langchain-mosec-endpoint)         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/mosec/langchain/dependency/Dockerfile)          | The docker image exposed the OPEA mosec reranking endpoint microservice base on Langchain framework for GenAI application use                                                                                              |
+| [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/reranks/tei/Dockerfile)                                 | The docker image exposed the OPEA reranking microservice based on tei docker image for GenAI application use                                                                                                               |
+| [opea/retriever-milvus](https://hub.docker.com/r/opea/retriever-milvus)                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/milvus/langchain/Dockerfile)                 | The docker image exposed the OPEA retrieval microservice based on milvus vectordb for GenAI application use                                                                                                                |
+| [opea/retriever-multimodal-redis](https://hub.docker.com/r/opea/retriever-multimodal-redis)                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/multimodal/redis/langchain/Dockerfile)       | The docker image exposed the OPEA retrieval microservice based on multimodal redis vectordb for GenAI application use                                                                                                      |
+| [opea/retriever-pathway](https://hub.docker.com/r/opea/retriever-pathway)                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/pathway/langchain/Dockerfile)                | The docker image exposed the OPEA retrieval microservice with pathway for GenAI application use                                                                                                                            |
+| [opea/retriever-pgvector](https://hub.docker.com/r/opea/retriever-pgvector)                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/pgvector/langchain/Dockerfile)               | The docker image exposed the OPEA retrieval microservice based on pgvector vectordb for GenAI application use                                                                                                              |
+| [opea/retriever-pinecone](https://hub.docker.com/r/opea/retriever-pinecone)                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/pinecone/langchain/Dockerfile)               | The docker image exposed the OPEA retrieval microservice based on pinecone vectordb for GenAI application use                                                                                                              |
+| [opea/retriever-qdrant](https://hub.docker.com/r/opea/retriever-qdrant)                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/qdrant/haystack/Dockerfile)                  | The docker image exposed the OPEA retrieval microservice based on qdrant vectordb for GenAI application use                                                                                                                |
+| [opea/retriever-redis](https://hub.docker.com/r/opea/retriever-redis)                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/redis/langchain/Dockerfile)                  | The docker image exposed the OPEA retrieval microservice based on redis vectordb for GenAI application use                                                                                                                 |
+| [opea/retriever-redis-llamaindex](https://hub.docker.com/r/opea/retriever-redis-llamaindex)                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/redis/llama_index/Dockerfile)                | The docker image exposed the OPEA retriever service based on LlamaIndex for GenAI application use                                                                                                                          |
+| [opea/retriever-vdms](https://hub.docker.com/r/opea/retriever-vdms)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/retrievers/vdms/langchain/Dockerfile)                   | The docker image exposed the OPEA retriever service based on Visual Data Management System for GenAI application use                                                                                                       |
+| [opea/speecht5](https://hub.docker.com/r/opea/speecht5)                                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/speecht5/dependency/Dockerfile)                     | The docker image exposed the OPEA SpeechT5 service for GenAI application use                                                                                                                                               |
+| [opea/speecht5-gaudi](https://hub.docker.com/r/opea/speecht5-gaudi)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/speecht5/dependency/Dockerfile.intel_hpu)           | The docker image exposed the OPEA SpeechT5 service on Gaudi2 for GenAI application use                                                                                                                                     |
+| [opea/tei-gaudi](https://hub.docker.com/r/opea/tei-gaudi/tags)                                                      | [Link](https://github.com/huggingface/tei-gaudi/blob/habana-main/Dockerfile-hpu)                                          | The docker image powered by HuggingFace Text Embedding Inference (TEI) on Gaudi2 for deploying and serving Embedding Models                                                                                                |
+| [opea/vectorstore-pathway](https://hub.docker.com/r/opea/vectorstore-pathway)                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/vectorstores/pathway/Dockerfile)                        | The docker image exposed the OPEA Vectorstores microservice with Pathway for GenAI application use                                                                                                                         |
+| [opea/video-llama-lvm-server](https://hub.docker.com/r/opea/video-llama-lvm-server)                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/video-llama/dependency/Dockerfile)                 | The docker image exposed the OPEA microservice running Video-Llama as a large visual model (LVM) server for GenAI application use                                                                                          |
+| [opea/tts](https://hub.docker.com/r/opea/tts)                                                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/speecht5/Dockerfile)                                | The docker image exposed the OPEA Text-To-Speech microservice for GenAI application use                                                                                                                                    |
+| [opea/vllm](https://hub.docker.com/r/opea/vllm)                                                                     | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.cpu)                                                     | The docker image powered by vllm-project for deploying and serving vllm Models                                                                                                                                             |
+| [opea/vllm-hpu]()                                                                                                   | [Link](https://github.com/HabanaAI/vllm-fork/blob/habana_main/Dockerfile.hpu)                                             | The docker image powered by vllm-fork for deploying and serving vllm-hpu Models                                                                                                                                            |
+| [opea/vllm-openvino](https://hub.docker.com/r/opea/vllm-openvino)                                                   | [Link](https://github.com/vllm-project/vllm/blob/main/Dockerfile.openvino)                                                | The docker image powered by vllm-project for deploying and serving vllm Models of the Openvino Framework                                                                                                                   |
+| [opea/web-retriever-chroma](https://hub.docker.com/r/opea/web-retriever-chroma)                                     | [Link](https://github.com/opea-project/GenAIComps/tree/main/comps/web_retrievers/chroma/langchain/Dockerfile)             | The docker image exposed the OPEA retrieval microservice based on chroma vectordb for GenAI application use                                                                                                                |
+| [opea/whisper](https://hub.docker.com/r/opea/whisper)                                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/whisper/dependency/Dockerfile)                      | The docker image exposed the OPEA Whisper service for GenAI application use                                                                                                                                                |
+| [opea/whisper-gaudi](https://hub.docker.com/r/opea/whisper-gaudi)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/whisper/dependency/Dockerfile.intel_hpu)            | The docker image exposed the OPEA Whisper service on Gaudi2 for GenAI application use                                                                                                                                      |
diff --git a/supported_examples.md b/supported_examples.md
index e913fd22e2..0754be3eee 100644
--- a/supported_examples.md
+++ b/supported_examples.md
@@ -63,17 +63,17 @@ This document introduces the supported examples of GenAIExamples. The supported
 
 [CodeGen](./CodeGen/README.md) is an example of copilot designed for code generation in Visual Studio Code.
 
-| Framework                                                                      | LLM                                                                             | Serving                                                         | HW          | Description |
-| ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ----------- |
-| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Copilot     |
+| Framework                                                                      | LLM                                                                                     | Serving                                                         | HW          | Description |
+| ------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ----------- |
+| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Copilot     |
 
 ### CodeTrans
 
 [CodeTrans](./CodeTrans/README.md) is an example of chatbot for converting code written in one programming language to another programming language while maintaining the same functionality.
 
-| Framework                                                                      | LLM                                                                                   | Serving                                                         | HW          | Description      |
-| ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ---------------- |
-| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [HuggingFaceH4/mistral-7b-grok](https://huggingface.co/HuggingFaceH4/mistral-7b-grok) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Code Translation |
+| Framework                                                                      | LLM                                                                                             | Serving                                                         | HW          | Description      |
+| ------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ---------------- |
+| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Code Translation |
 
 ### DocSum
 
@@ -186,7 +186,15 @@ FAQ Generation Application leverages the power of large language models (LLMs) t
 
 ### MultimodalQnA
 
-[MultimodalQnA](./MultimodalQnA/README.md) addresses your questions by dynamically fetching the most pertinent multimodal information (frames, transcripts, and/or captions) from your collection of videos.
+[MultimodalQnA](./MultimodalQnA/README.md) addresses your questions by dynamically fetching the most pertinent multimodal information (frames, transcripts, and/or captions) from your collection of videos, images, or audio files. MultimodalQnA utilizes BridgeTower model, a multimodal encoding transformer model which merges visual and textual data into a unified semantic space. During the ingestion phase, the BridgeTower model embeds both visual cues and auditory facts as texts, and those embeddings are then stored in a vector database. When it comes to answering a question, the MultimodalQnA will fetch its most relevant multimodal content from the vector store and feed it into a downstream Large Vision-Language Model (LVM) as input context to generate a response for the user.
+
+| Service   | Model                                                                                                             | HW         | Description                   |
+| --------- | ----------------------------------------------------------------------------------------------------------------- | ---------- | ----------------------------- |
+| Embedding | [BridgeTower/bridgetower-large-itm-mlm-itc](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc)     | Xeon/Gaudi | Multimodal embeddings service |
+| Embedding | [BridgeTower/bridgetower-large-itm-mlm-gaudi](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi) | Gaudi      | Multimodal embeddings service |
+| LVM       | [llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)                                       | Xeon       | LVM service                   |
+| LVM       | [llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)                                     | Xeon       | LVM service                   |
+| LVM       | [llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)                     | Gaudi      | LVM service                   |
 
 ### ProductivitySuite