{
  "name": "Content-Intelligence-data-curation-bruno-collection",
  "version": "1",
  "items": [
    {
      "type": "folder",
      "name": "Authentication",
      "filename": "Authentication",
      "examples": [],
      "root": {
        "docs": "A request for  getting an access token from the Hyland Authorization Service for use on other requests.",
        "meta": {
          "name": "Authentication"
        }
      },
      "items": [
        {
          "type": "http",
          "name": "Get Access Token",
          "filename": "Get Access Token.bru",
          "seq": 1,
          "settings": {},
          "tags": [],
          "examples": [],
          "request": {
            "url": "{{OAuth_Host}}/connect/token",
            "method": "POST",
            "headers": [],
            "params": [],
            "body": {
              "mode": "formUrlEncoded",
              "formUrlEncoded": [
                {
                  "name": "grant_type",
                  "value": "urn:hyland:params:oauth:grant-type:api-credentials",
                  "enabled": true
                },
                {
                  "name": "client_id",
                  "value": "{{client_id}}",
                  "enabled": true
                },
                {
                  "name": "client_secret",
                  "value": "{{client_secret}}",
                  "enabled": true
                },
                {
                  "name": "scope",
                  "value": "environment_authorization",
                  "enabled": true
                }
              ],
              "multipartForm": [],
              "file": []
            },
            "script": {
              "res": "let data = res.body\nbru.setEnvVar(\"token\", data.access_token)"
            },
            "vars": {},
            "assertions": [],
            "tests": "",
            "docs": "A query for retrieving an access token from the Hyland Authorization Server.\n\nRelies on environment values for `client_id` and `client_secret` being configured to authorize the user.\n\nHas a post response script for storing the access_token on the response to the `token` environment variable for use in other requests for simplicity.",
            "auth": {
              "mode": "none"
            }
          }
        }
      ]
    },
    {
      "type": "folder",
      "name": "S3 Operations",
      "filename": "S3 Operations",
      "examples": [],
      "root": {
        "docs": "Examples of querying S3 for uploading documents to be processed and downloading the resulting output when a job is complete.\n\nAs we are using pre-signed URLs no additional authorization is required.",
        "meta": {
          "name": "S3 Operations"
        }
      },
      "items": [
        {
          "type": "http",
          "name": "Download Processed Output",
          "filename": "Download Processed Output.bru",
          "seq": 2,
          "settings": {},
          "tags": [],
          "examples": [],
          "request": {
            "url": "{{most_recent_get_url}}",
            "method": "GET",
            "headers": [],
            "params": [],
            "body": {
              "mode": "none",
              "formUrlEncoded": [],
              "multipartForm": [],
              "file": []
            },
            "script": {},
            "vars": {},
            "assertions": [],
            "tests": "",
            "docs": "Request for downloading files from S3 for jobs processed by the Data Curation pipeline.\n\nAutomatically makes use of the returned get url from Initiate Pipeline/Generate Pre-sign URLs.\n\nJob should be in `Done` status before this request is made.",
            "auth": {
              "mode": "none"
            }
          }
        },
        {
          "type": "http",
          "name": "Upload File For Processing",
          "filename": "Upload File For Processing.bru",
          "seq": 1,
          "settings": {},
          "tags": [],
          "examples": [],
          "request": {
            "url": "{{most_recent_put_url}}",
            "method": "PUT",
            "headers": [],
            "params": [],
            "body": {
              "mode": "file",
              "formUrlEncoded": [],
              "multipartForm": [],
              "file": [
                {
                  "filePath": "",
                  "contentType": "",
                  "selected": true
                }
              ]
            },
            "script": {
              "req": "  // AWS S3 presigned URLs include a Content-Type in their signature.\n// The upload request MUST use the same Content-Type, or S3 will reject it.\n// Our presigned URL is generated with \"application/octet-stream\" (generic binary).\n// This override prevents Bruno from auto-detecting a different Content-Type based on the file extension.\nreq.setHeader(\"Content-Type\", \"application/octet-stream\")"
            },
            "vars": {},
            "assertions": [],
            "tests": "",
            "docs": "Request for uploading files to S3 for processing in the Data Curation pipeline.\n\nAutomatically makes use of the returned put url from `Initiate Pipeline/Generate Pre-sign URLs`.\n\nPre-Script should set the correct header for upload requests.",
            "auth": {
              "mode": "none"
            }
          }
        }
      ]
    },
    {
      "type": "folder",
      "name": "Status Check",
      "filename": "Status Check",
      "examples": [],
      "root": {
        "docs": "Requests for getting the status of a job based on the job_id.\n\nrequires an authentication token from the Hyland Authorization Service which can be requested using `Authentication/Get Access Token` in this collection.",
        "meta": {
          "name": "Status Check"
        }
      },
      "items": [
        {
          "type": "http",
          "name": "Get Status For Job",
          "filename": "Get Status For Job.bru",
          "seq": 1,
          "settings": {},
          "tags": [],
          "examples": [],
          "request": {
            "url": "{{Api_Host}}/status/job_id",
            "method": "GET",
            "headers": [],
            "params": [],
            "body": {
              "mode": "none",
              "formUrlEncoded": [],
              "multipartForm": [],
              "file": []
            },
            "script": {},
            "vars": {},
            "assertions": [],
            "tests": "",
            "docs": "Retrieves the status for the provided `job_id`. \n\nRequires a `token` from the Hyland Authorization Service, using `Authentication/Get Access Token` which the request automatically makes use of.",
            "auth": {
              "mode": "inherit"
            }
          }
        },
        {
          "type": "http",
          "name": "Most Recent Job - Get Status",
          "filename": "Most Recent Job - Get Status.bru",
          "seq": 3,
          "settings": {},
          "tags": [],
          "examples": [],
          "request": {
            "url": "{{Api_Host}}/status/{{most_recent_job}}",
            "method": "GET",
            "headers": [],
            "params": [],
            "body": {
              "mode": "none",
              "formUrlEncoded": [],
              "multipartForm": [],
              "file": []
            },
            "script": {},
            "vars": {},
            "assertions": [],
            "tests": "",
            "docs": "The same request as `Status Check/Get Status For Request`, but makes use of a stored environment variable for the job_id.\n\nMakes use of the `most_recent_job` variable stored by the `Initiate Piepline/Generate Pre-sign URLs` request's post response script. Provides an example of how you might use variables between requests.",
            "auth": {
              "mode": "inherit"
            }
          }
        }
      ]
    },
    {
      "type": "folder",
      "name": "Initiate Pipeline",
      "filename": "Initiate Pipeline",
      "examples": [],
      "root": {
        "docs": "Request for initializing the Data Curation pipeline for a job.\n\nRequest returns a `job_id`, `put_url`, and `get_url` that can be used in other requests for retrieving status and uploading inputs and downloading outputs.\n\nOnce the pre-signed URLs are requested, the job will not run until a file is uploaded to the `put_url` bucket in S3.",
        "meta": {
          "name": "Initiate Pipeline"
        }
      },
      "items": [
        {
          "type": "http",
          "name": "Generate Pre-sign URLs",
          "filename": "Generate Pre-sign URLs.bru",
          "seq": 1,
          "settings": {},
          "tags": [],
          "examples": [],
          "request": {
            "url": "{{Api_Host}}/presign",
            "method": "POST",
            "headers": [],
            "params": [],
            "body": {
              "mode": "json",
              "json": "{\n  \"normalization\": {\n    \"quotations\": true,\n    \"dashes\": true\n  },\n  \"pii\": {\n    \"mode\": \"detection\",\n    \"entity_redaction\": false\n  },\n  \"chunking\": true,\n  \"embedding\": true,\n  \"vector_config\": {\n    \"embedding_model\": \"text-embedding-3-large\",\n    \"chunk_size\": 1000\n  },\n  \"json_schema\": \"PIPELINE\"\n}",
              "formUrlEncoded": [],
              "multipartForm": [],
              "file": []
            },
            "script": {
              "res": "var data = res.body\nbru.setEnvVar('most_recent_job',data.job_id)\nbru.setEnvVar('most_recent_put_url', data.put_url)\nbru.setEnvVar('most_recent_get_url', data.get_url)"
            },
            "vars": {},
            "assertions": [],
            "tests": "",
            "docs": "Requests pre-sign URLs for a Data Curation job. The first step in getting documents processed by the pipeline.\n\nRequires an authorization token which can be retrieved from `Authentication/Get Access Token`.\n\n## Configuration Options\n\nAll options are optional. Defaults are applied for any missing values.\n\n**normalization** (object):\n- `quotations` (boolean): Normalize quotation marks. Default: false\n- `dashes` (boolean): Normalize dashes. Default: false\n\n**pii** (object or false):\n- Set to `false` to disable PII processing (default)\n- Or provide an object with:\n  - `mode` (string): Either \"detection\" or \"redaction\"\n  - `entity_redaction` (boolean): Whether to redact named entities. Default: false\n\n**chunking** (boolean): Enable text chunking. Default: false\n\n**embedding** (boolean): Enable embeddings generation. Default: false\n\n**vector_config** (object): Configuration for embeddings\n- `embedding_model` (string): Name of the embedding model to use\n- `chunk_size` (integer): Size of text chunks (must be within model's max_chunk_size)\n\n**embeddings_model** (string): Alternative to `vector_config.embedding_model` (top-level)\n\n**json_schema** (string or false): Output format. Valid values: \"FULL\", \"MDAST\", \"PIPELINE\", or false\n- If not provided or false, json_schema will not be included in output\n\n## Returns\n\nReturns a `job_id`, `put_url`, and `get_url` which a post response script will store in your environment variables as `most_recent_job`, `most_recent_put_url`, and `most_recent_get_url` for use in other requests.\n\nProcessing on a job begins once a file is uploaded to the `put_url` S3 bucket.",
            "auth": {
              "mode": "inherit"
            }
          }
        }
      ]
    }
  ],
  "activeEnvironmentUid": "3lUHBWrvBORvoC7n6Oub8",
  "environments": [
    {
      "variables": [
        {
          "name": "OAuth_Host",
          "value": "https://auth.iam.experience.hyland.com/idp",
          "enabled": true,
          "secret": false,
          "type": "text"
        },
        {
          "name": "Api_Host",
          "value": "https://knowledge-enrichment.ai.experience.hyland.com/latest/api/data-curation",
          "enabled": true,
          "secret": false,
          "type": "text"
        },
        {
          "name": "client_id",
          "value": "",
          "enabled": true,
          "secret": true,
          "type": "text"
        },
        {
          "name": "client_secret",
          "value": "",
          "enabled": true,
          "secret": true,
          "type": "text"
        },
        {
          "name": "token",
          "value": "",
          "enabled": true,
          "secret": true,
          "type": "text"
        }
      ],
      "name": "Production"
    }
  ],
  "root": {
    "request": {
      "auth": {
        "mode": "bearer",
        "bearer": {
          "token": "{{token}}"
        }
      }
    }
  },
  "brunoConfig": {
    "version": "1",
    "name": "Contint-data-curation-bruno-collection",
    "type": "collection",
    "ignore": [
      "node_modules",
      ".git"
    ],
    "size": 0.0069599151611328125,
    "filesCount": 15
  },
  "exportedAt": "2026-03-30T17:05:27.836Z",
  "exportedUsing": "Bruno/3.0.2"
}