google-gemini · shilpakancharla · Apr 18, 2024 · Apr 18, 2024 · Apr 18, 2024 · Apr 18, 2024
diff --git a/quickstarts/Embeddings.ipynb b/quickstarts/Embeddings.ipynb
@@ -35,18 +35,18 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 2,
       "metadata": {
         "id": "YD6urJjWGVDf"
       },
       "outputs": [],
       "source": [
-        "!pip install -U -q google.generativeai # Install the Python SDK"
+        "!pip install -q google-generativeai"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 3,
       "metadata": {
         "id": "yBapI259C99C"
       },
@@ -68,7 +68,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 4,
       "metadata": {
         "id": "Zey3UiYGDDzU"
       },
@@ -87,42 +87,51 @@
       "source": [
         "## Embed content\n",
         "\n",
-        "Call the `embed_content` method with the `models/embedding-001` model to generate text embeddings."
+        "Call the `embed_content` method with the `models/text-embedding-004` model to generate text embeddings."
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 5,
       "metadata": {
-        "id": "J76TNa3QDwCc"
+        "id": "J76TNa3QDwCc",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        },
+        "outputId": "ab2eaa5e-21b8-4ae9-db4a-a19ee008a175"
       },
       "outputs": [
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
-            "[0.04703258, -0.040190056, -0.029026963, -0.026809 ... TRIMMED]\n"
+            "[0.013168523, -0.008711934, -0.046782676, 0.000699 ... TRIMMED]\n"
           ]
         }
       ],
       "source": [
         "text = \"Hello world\"\n",
-        "result = genai.embed_content(model=\"models/embedding-001\", content=text)\n",
+        "result = genai.embed_content(model=\"models/text-embedding-004\", content=text)\n",
         "\n",
         "# Print just a part of the embedding to keep the output manageable\n",
         "print(str(result['embedding'])[:50], '... TRIMMED]')"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 6,
       "metadata": {
-        "id": "rU6XX33547Ll"
+        "id": "rU6XX33547Ll",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "182bec79-016c-46ed-8910-1010a8c765f1"
       },
       "outputs": [
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
             "768\n"
           ]
@@ -145,24 +154,29 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 7,
       "metadata": {
-        "id": "Hzz-7Heuf4tV"
+        "id": "Hzz-7Heuf4tV",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 69
+        },
+        "outputId": "42e1ae40-fb91-47fd-84c4-cc0e1a68d467"
       },
       "outputs": [
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
-            "[-0.0002620658, -0.05592018, -0.012463195, -0.0206 ... TRIMMED]\n",
-            "[-0.0151748555, -0.050790474, -0.032357067, -0.058 ... TRIMMED]\n",
-            "[0.025271073, -0.064161226, -0.025818137, -0.00611 ... TRIMMED]\n"
+            "[-0.010632277, 0.019375855, 0.0209652, 0.000770642 ... TRIMMED]\n",
+            "[0.018467998, 0.0054281196, -0.017658804, 0.013859 ... TRIMMED]\n",
+            "[0.05808907, 0.020941721, -0.108728774, -0.0403925 ... TRIMMED]\n"
           ]
         }
       ],
       "source": [
         "result = genai.embed_content(\n",
-        "    model=\"models/embedding-001\",\n",
+        "    model=\"models/text-embedding-004\",\n",
         "    content=[\n",
         "      'What is the meaning of life?',\n",
         "      'How much wood would a woodchuck chuck?',\n",
@@ -178,7 +192,7 @@
         "id": "sSKcLGIpo8yc"
       },
       "source": [
-        "## Use `task_type` to provide a hint to the model how you'll use the embeddings"
+        "## Specify `task_type`"
       ]
     },
     {
@@ -187,12 +201,13 @@
         "id": "bz0zq1_shk98"
       },
       "source": [
-        "Let's look at all the parameters the `embed_content` method takes. There are four:\n",
+        "Let's look at all the parameters the `embed_content` method takes. There are five:\n",
         "\n",
-        "* `model`: Required. Must be `models/embedding-001`.\n",
+        "* `model`: Required. Must be `models/text-embedding-004` or `models/embedding-001`.\n",
         "* `content`: Required. The content that you would like to embed.\n",
-        "*`task_type`: Optional. The task type for which the embeddings will be used. See below for possible values.\n",
+        "*`task_type`: Optional. The task type for which the embeddings will be used.\n",
         "* `title`: Optional. You should only set this parameter if your task type is `retrieval_document` (or `document`).\n",
+        "* `output_dimensionality`: Optional. Reduced dimension for the output embedding. If set, excessive values in the output embedding are truncated from the end. This is supported by `models/text-embedding-004`, but cannot be specified in `models/embedding-001`.\n",
         "\n",
         "`task_type` is an optional parameter that provides a hint to the API about how you intend to use the embeddings in your application.\n",
         "\n",
@@ -203,38 +218,96 @@
         "* `retrieval_document` (or `document`): The given text is a document from a corpus being searched. Optionally, also set the `title` parameter with the title of the document.\n",
         "* `semantic_similarity` (or `similarity`): The given text will be used for  Semantic Textual Similarity (STS).\n",
         "* `classification`: The given text will be classified.\n",
-        "* `clustering`: The embeddings will be used for clustering.\n"
+        "* `clustering`: The embeddings will be used for clustering.\n",
+        "* `question_answering`: The given text will be used for question answering.\n",
+        "* `fact_verification`: The given text will be used for fact verification."
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 11,
       "metadata": {
-        "id": "LFjMapMV91es"
+        "id": "LFjMapMV91es",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 52
+        },
+        "outputId": "8b534c70-b880-4614-aa90-b0b4b337d3d1"
       },
       "outputs": [
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
-            "[0.04703258, -0.040190056, -0.029026963, -0.026809 ... TRIMMED]\n",
-            "[0.05889487, -0.004501751, -0.067298084, -0.012740 ... TRIMMED]\n"
+            "[0.013168523, -0.008711934, -0.046782676, 0.00069968984]\n",
+            "[0.023399517, -0.00854715, -0.052534223, -0.012143112]\n"
           ]
         }
       ],
       "source": [
         "# Notice the API returns different embeddings depending on `task_type`\n",
         "result1 = genai.embed_content(\n",
-        "    model=\"models/embedding-001\",\n",
+        "    model=\"models/text-embedding-004\",\n",
+        "    content=\"Hello world\",\n",
+        "    output_dimensionality=4) # Set output_dimensionality to truncate the dimesions of the embeddings.\n",
+        "\n",
+        "result2 = genai.embed_content(\n",
+        "    model=\"models/text-embedding-004\",\n",
+        "    content=\"Hello world\",\n",
+        "    task_type=\"document\",\n",
+        "    output_dimensionality=4)\n",
+        "\n",
+        "print(str(result1['embedding']))\n",
+        "print(str(result2['embedding']))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Truncating embeddings\n",
+        "\n",
+        "The `text-embedding-004` model also supports lower embedding dimensions. Specify `output_dimensionality` to truncate the output."
+      ],
+      "metadata": {
+        "id": "r0r0dt958QQg"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "result1 = genai.embed_content(\n",
+        "    model=\"models/text-embedding-004\",\n",
         "    content=\"Hello world\")\n",
         "\n",
+        "\n",
         "result2 = genai.embed_content(\n",
-        "    model=\"models/embedding-001\",\n",
+        "    model=\"models/text-embedding-004\",\n",
         "    content=\"Hello world\",\n",
-        "    task_type=\"document\",)\n",
+        "    output_dimensionality=10)\n",
+        "\n",
         "\n",
-        "print(str(result1['embedding'])[:50], '... TRIMMED]')\n",
-        "print(str(result2['embedding'])[:50], '... TRIMMED]')"
+        "(len(result1['embedding']), len(result2['embedding']))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        },
+        "id": "bX_AjfMx8PvV",
+        "outputId": "738afb36-ae11-4aae-a3be-047a098f9559"
+      },
+      "execution_count": 10,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(768, 10)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 10
+        }
       ]
     },
     {
@@ -265,8 +338,8 @@
   ],
   "metadata": {
     "colab": {
-      "name": "Embeddings.ipynb",
-      "toc_visible": true
+      "toc_visible": true,
+      "provenance": []
     },
     "kernelspec": {
       "display_name": "Python 3",
@@ -275,4 +348,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
+}