|
154 | 154 | }, |
155 | 155 | { |
156 | 156 | "cell_type": "code", |
157 | | - "execution_count": null, |
| 157 | + "execution_count": 3, |
158 | 158 | "metadata": { |
159 | 159 | "id": "8-Zw72vFORz_", |
160 | | - "outputId": "98b34e63-ec34-4799-adf8-fce2af428b25", |
161 | 160 | "colab": { |
162 | 161 | "base_uri": "https://localhost:8080/" |
163 | | - } |
| 162 | + }, |
| 163 | + "outputId": "98b34e63-ec34-4799-adf8-fce2af428b25" |
164 | 164 | }, |
165 | 165 | "outputs": [ |
166 | 166 | { |
|
205 | 205 | }, |
206 | 206 | { |
207 | 207 | "cell_type": "code", |
208 | | - "execution_count": null, |
| 208 | + "execution_count": 4, |
209 | 209 | "metadata": { |
210 | 210 | "id": "rJG1pVZO317x" |
211 | 211 | }, |
|
237 | 237 | }, |
238 | 238 | { |
239 | 239 | "cell_type": "code", |
240 | | - "execution_count": null, |
| 240 | + "execution_count": 5, |
241 | 241 | "metadata": { |
242 | 242 | "id": "mfgi_oR6tTIB" |
243 | 243 | }, |
|
273 | 273 | }, |
274 | 274 | { |
275 | 275 | "cell_type": "code", |
276 | | - "execution_count": null, |
| 276 | + "execution_count": 6, |
277 | 277 | "metadata": { |
278 | 278 | "id": "B2R3P8mMvK9q" |
279 | 279 | }, |
|
298 | 298 | }, |
299 | 299 | { |
300 | 300 | "cell_type": "code", |
301 | | - "execution_count": null, |
| 301 | + "execution_count": 7, |
302 | 302 | "metadata": { |
303 | 303 | "id": "fRHHTpaV4Xyo" |
304 | 304 | }, |
|
346 | 346 | }, |
347 | 347 | { |
348 | 348 | "cell_type": "code", |
349 | | - "execution_count": null, |
| 349 | + "execution_count": 8, |
350 | 350 | "metadata": { |
351 | | - "id": "Z8NvNLTfxPTf" |
| 351 | + "id": "Z8NvNLTfxPTf", |
| 352 | + "colab": { |
| 353 | + "base_uri": "https://localhost:8080/", |
| 354 | + "height": 356 |
| 355 | + }, |
| 356 | + "outputId": "f61609e6-408a-404f-eb7f-af7a992723a0" |
352 | 357 | }, |
353 | | - "outputs": [], |
| 358 | + "outputs": [ |
| 359 | + { |
| 360 | + "output_type": "error", |
| 361 | + "ename": "ValueError", |
| 362 | + "evalue": "Unsupported file location. Only GCS paths starting with 'gs://' are supported.", |
| 363 | + "traceback": [ |
| 364 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 365 | + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", |
| 366 | + "\u001b[0;32m/tmp/ipython-input-3874795059.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 16\u001b[0m )\n\u001b[1;32m 17\u001b[0m \u001b[0minput_data_path\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"{INPUT_DATA_PATH}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m vapo_lib.validate_prompt_and_data(\n\u001b[0m\u001b[1;32m 19\u001b[0m \u001b[0;34m\"\\n\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mSYSTEM_INSTRUCTION\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mPROMPT_TEMPLATE\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0minput_data_path\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
| 367 | + "\u001b[0;32m/content/vapo_lib.py\u001b[0m in \u001b[0;36mvalidate_prompt_and_data\u001b[0;34m(template, dataset_path, placeholder_to_content, label_enforced)\u001b[0m\n\u001b[1;32m 159\u001b[0m ) -> None:\n\u001b[1;32m 160\u001b[0m \u001b[0;34m\"\"\"Validates the prompt template and the dataset.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 161\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 162\u001b[0m \u001b[0mplaceholder_to_content_json\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mplaceholder_to_content\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[0mtemplate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msub\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mr\"(?<!{){(?!{)(\\s*\\w+\\s*)(?<!})}(?!})\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34mr\"{{\\1}}\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtemplate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
| 368 | + "\u001b[0;32m/content/vapo_lib.py\u001b[0m in \u001b[0;36mload_dataset\u001b[0;34m(dataset)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;34m\"\"\"Loads and parses the dataset based on its file type ('.jsonl' or '.csv').\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;31m# Load the file from GCS\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m \u001b[0mdata_str\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_file_from_gcs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0;31m# Parse based on file type\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
| 369 | + "\u001b[0;32m/content/vapo_lib.py\u001b[0m in \u001b[0;36mload_file_from_gcs\u001b[0;34m(dataset)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 88\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 89\u001b[0m \u001b[0;34m\"Unsupported file location. Only GCS paths starting with 'gs://' are\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;34m\" supported.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
| 370 | + "\u001b[0;31mValueError\u001b[0m: Unsupported file location. Only GCS paths starting with 'gs://' are supported." |
| 371 | + ] |
| 372 | + } |
| 373 | + ], |
354 | 374 | "source": [ |
355 | 375 | "import datetime\n", |
356 | 376 | "import json\n", |
|
0 commit comments