mindersec
diff --git a/‎cmd/dev/app/rule_type/rttst.go‎
Lines changed: 3 additions & 3 deletions b/‎cmd/dev/app/rule_type/rttst.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎database/migrations/000112_data_sources_structure.down.sql‎
Lines changed: 8 additions & 0 deletions b/‎database/migrations/000112_data_sources_structure.down.sql‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎database/migrations/000112_data_sources_structure.up.sql‎
Lines changed: 17 additions & 0 deletions b/‎database/migrations/000112_data_sources_structure.up.sql‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎database/query/datasources.sql‎
Lines changed: 3 additions & 3 deletions b/‎database/query/datasources.sql‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/docs/ref/proto.mdx‎
Lines changed: 1 addition & 0 deletions b/‎docs/docs/ref/proto.mdx‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/docs/understand/data_sources.md‎
Lines changed: 42 additions & 13 deletions b/‎docs/docs/understand/data_sources.md‎
Lines changed: 42 additions & 13 deletions
diff --git a/‎internal/datasources/factory.go‎
Lines changed: 3 additions & 2 deletions b/‎internal/datasources/factory.go‎
Lines changed: 3 additions & 2 deletions
@@ -190,7 +190,7 @@ func testCmdRun(cmd *cobra.Command, _ []string) error {
 		Alert:     actionOptFromString(profile.Alert, models.ActionOptOff),
 	}
 
-	dsRegistry, err := getDataSources(dataSourcefiles)
+	dsRegistry, err := getDataSources(dataSourcefiles, prov)
 	if err != nil {
 		return fmt.Errorf("error getting data sources: %w", err)
 	}
@@ -502,7 +502,7 @@ func actionOptFromString(s *string, defAction models.ActionOpt) models.ActionOpt
 	return models.ActionOptUnknown
 }
 
-func getDataSources(readers []*os.File) (*v1datasources.DataSourceRegistry, error) {
+func getDataSources(readers []*os.File, provider provifv1.Provider) (*v1datasources.DataSourceRegistry, error) {
 	reg := v1datasources.NewDataSourceRegistry()
 	for _, r := range readers {
 		fname := r.Name()
@@ -515,7 +515,7 @@ func getDataSources(readers []*os.File) (*v1datasources.DataSourceRegistry, erro
 			return nil, fmt.Errorf("error validating data source %s: %w", fname, err)
 		}
 
-		intds, err := internalds.BuildFromProtobuf(ds)
+		intds, err := internalds.BuildFromProtobuf(ds, provider)
 		if err != nil {
 			return nil, fmt.Errorf("error building data source %s: %w", fname, err)
 		}
 
@@ -0,0 +1,8 @@
+-- SPDX-FileCopyrightText: Copyright 2024 The Minder Authors
+-- SPDX-License-Identifier: Apache-2.0
+
+BEGIN;
+
+ALTER TABLE data_sources DROP COLUMN metadata;
+
+COMMIT;
@@ -0,0 +1,17 @@
+-- SPDX-FileCopyrightText: Copyright 2024 The Minder Authors
+-- SPDX-License-Identifier: Apache-2.0
+
+BEGIN;
+
+-- This migration adds support for storing data source metadata at
+-- the datasource level.  This is stored as JSONB using an internal
+-- schema derived from (but separate than) protobuf.  (This could
+-- also been used for the data source type or even rule storage, but
+-- it's not worth migrating at this time.)
+--
+-- NULL is equivalent to "empty object" for migration purposes.
+
+ALTER TABLE data_sources
+    ADD COLUMN metadata JSONB DEFAULT NULL;
+
+COMMIT;
@@ -1,8 +1,8 @@
 -- CreateDataSource creates a new datasource in a given project.
 
 -- name: CreateDataSource :one
-INSERT INTO data_sources (project_id, name, display_name, subscription_id)
-VALUES ($1, $2, $3, sqlc.narg(subscription_id)) RETURNING *;
+INSERT INTO data_sources (project_id, name, display_name, subscription_id, metadata)
+VALUES ($1, $2, $3, sqlc.narg(subscription_id), sqlc.arg(metadata)::json) RETURNING *;
 
 -- AddDataSourceFunction adds a function to a datasource.
 
@@ -14,7 +14,7 @@ VALUES ($1, $2, $3, $4, $5) RETURNING *;
 
 -- name: UpdateDataSource :one
 UPDATE data_sources
-SET display_name = $3
+SET display_name = $3, metadata = sqlc.arg(metadata)::json
 WHERE id = $1 AND project_id = $2
 RETURNING *;
 
 
@@ -12,16 +12,18 @@ While providers in Minder typically create or manage entities (e.g., repositorie
 - They do **not** create entities. Data sources only enhance an entity already known to Minder.  
 - They can reference external services—for instance, pulling in vulnerability data from OSV or ClearlyDefined or a malware scanning service.  
 - They have arguments that help shape the queries or requests the data source makes against external systems (e.g., specifying the package name, ecosystem, or version).
+- They can leverage the authentication from the current Provider to fetch additional authenticated data after the initial ingestion.
 
 ---
 
 ### Why Would You Use a *data source*?
 
 You would create a data source in Minder whenever you need additional information about an entity that was not included in the initial ingest. Common scenarios include:
 
-- **Enriching dependencies**: If a provider ingests a list of dependencies from a repository, a data source can query a vulnerability database (like OSV or ClearlyDefined) to see if any are known to be risky *from a security or licensing point of view*.  
-- **Performing security checks**: A data source might call out to a malware scanner or an external REST service to verify the integrity of binaries or tarballs.  
-- **Fetching attestation data**: If you need statements of provenance or supply-chain attestations from a separate system, a data source can gather this data for your entity.  
+- **Followup queries**: In some cases, it may be necessary to fetch additional information to evaluate the state of the entity based on data from the initial ingestion.  (For example, checking whether a workflow action has been passing after determining the relevant action.)
+- **Enriching dependencies**: If a provider ingests a list of dependencies from a repository, a data source can query a vulnerability database (like OSV or ClearlyDefined) to see if any are known to be risky *from a security or licensing point of view*.
+- **Performing security checks**: A data source might call out to a malware scanner or an external REST service to verify the integrity of binaries or tarballs.
+- **Fetching attestation data**: If you need statements of provenance or supply-chain attestations from a separate system, a data source can gather this data for your entity.
 - **Aggregating metadata from multiple sources**: For instance, combining ClearlyDefined’s scoring data with an internal database that tracks maintainers, deprecation status, or license data.
 
 Essentially, data sources let Minder orchestrate external queries that feed into policy evaluations (e.g., Rego constraints) to create richer compliance, security, or operational checks.
@@ -32,14 +34,15 @@ Essentially, data sources let Minder orchestrate external queries that feed into
 
 When you invoke a data source in a Rego policy, you typically provide a set of arguments. These arguments tell the data source *what* to fetch or *how* to fetch it.
 
-For example, consider the YAML snippet below:
+For example, consider the two YAML snippets below:
 
 ```yaml
 version: v1
 type: data-source
 name: ghapi
 context: {}
 rest:
+  providerAuth: true
   def:
     license:
       endpoint: https://api.github.com/repos/{owner}/{repo}/license
@@ -71,20 +74,46 @@ rest:
             type: string
           repo:
             type: string
+    graphql:
+      endpoint: https://api.github.com/graphql
+      method: POST
+      body_from_field: query
+      input_schema:
+        query:
+          type: object
+          properties:
+            query:
+              type: object
+          # We don't specify properties here, but a caller might use:
+          # {concat("", "repository(name:\"", repo "\", owner:\"", owner "\"") {rulesets(first:20) ...}}
+      fallback:
+        http_status: 200
+        body: '{results: [], error: "Error fetching data"}'
 ```
 
 #### Key Fields
 
-- **version / type / name**: Defines this resource as a data source called `ghapi`.  
-- **context**: Typically holds the project context. Here it’s `{}`, meaning it’s globally available (or within your chosen project scope).  
-- **rest**: Declares REST-based operations. Under `def`, we define three endpoints:
-  - `license` → Fetches repository license info from GitHub  
-  - `repo_config` → Fetches general repo config (e.g., visibility, description, forks, watchers)  
-  - `private_vuln_reporting` → Fetches whether the repository has private vulnerability reporting enabled  
-- **endpoint**: A template URI with placeholders for `{owner}` and `{repo}`.  
-- **parse**: Indicates the response format (`json`).  
-- **input_schema**: Uses JSON Schema to define the parameters needed by this data source in Rego. If you specify `input_schema` incorrectly, you will receive an error at runtime, helping ensure that the data you pass in matches what the data source expects.  
+- **version / type / name**: Defines this resource as a data source called `ghapi`.
+- **context**: Typically holds the project context. Here it’s `{}`, meaning it’s globally available (or within your chosen project scope).
+- **rest**: Declares REST-based operations. If `providerAuth` is set to `true`, the provider's authentication mechanism will be used if the method's endpoint matches the provider's URL. Under `def`, we define three endpoints:
+  - `license` → Fetches repository license info from GitHub
+  - `repo_config` → Fetches general repo config (e.g., visibility, description, forks, watchers)
+  - `private_vuln_reporting` → Fetches whether the repository has private vulnerability reporting enabled
+  - `graphql` → Performs a GraphQL query
+
+Each method defined in the rest endpoints has the following fields:
+
+- **endpoint**: A [RFC 6570](https://tools.ietf.org/html/rfc6570) template URI with the supplied arguments (see [Using a data source in a Rule](#using-a-data-source-in-a-rule)).
+- **method**: The HTTP method to invoke.  Defaults to `GET`.
+- **headers**: A key-value map of static headers to add to the request.
+- **bodyobj**: Specifies the request body as a static JSON object.
+- **bodystr**: Specifies the request body as a static string.
+- **body_from_field**: Specifies that the request body should be produced from the specified argument. Objects will be converted to JSON representation, while strings will be used as an exact request body.
+- **parse**: Indicates the response format (`json`). If unset, the result will be the body as a string.
+- **input_schema**: Uses JSON Schema to define the parameters needed by this data source in Rego. If you specify `input_schema` incorrectly, you will receive an error at runtime, helping ensure that the data you pass in matches what the data source expects.
   - *(Note: You can define additional properties as needed, but only fields explicitly handled by the data source code will be recognized.)*
+- **expected_status**: Defines the expected response code. The default expected code is 200. If an unexpected response code is received, an error will be raised.
+- **fallback**: If the request fails after 4 attempts and a fallback is defined, the specified **http_status** and **body** will be returned.
 
 ---
 
 
@@ -11,11 +11,12 @@ import (
 	"github.com/mindersec/minder/internal/datasources/structured"
 	minderv1 "github.com/mindersec/minder/pkg/api/protobuf/go/minder/v1"
 	v1datasources "github.com/mindersec/minder/pkg/datasources/v1"
+	provinfv1 "github.com/mindersec/minder/pkg/providers/v1"
 )
 
 // BuildFromProtobuf is a factory function that builds a new data source based on the given
 // data source type.
-func BuildFromProtobuf(ds *minderv1.DataSource) (v1datasources.DataSource, error) {
+func BuildFromProtobuf(ds *minderv1.DataSource, provider provinfv1.Provider) (v1datasources.DataSource, error) {
 	if ds == nil {
 		return nil, fmt.Errorf("data source is nil")
 	}
@@ -28,7 +29,7 @@ func BuildFromProtobuf(ds *minderv1.DataSource) (v1datasources.DataSource, error
 	case *minderv1.DataSource_Structured:
 		return structured.NewStructDataSource(ds.GetStructured())
 	case *minderv1.DataSource_Rest:
-		return rest.NewRestDataSource(ds.GetRest())
+		return rest.NewRestDataSource(ds.GetRest(), provider)
 	default:
 		return nil, fmt.Errorf("unknown data source type: %T", ds)
 	}
Original file line number	Diff line number	Diff line change
`@@ -190,7 +190,7 @@ func testCmdRun(cmd *cobra.Command, _ []string) error {`
`190`	`190`	`Alert: actionOptFromString(profile.Alert, models.ActionOptOff),`
`191`	`191`	`}`
`192`	`192`
`193`		`- dsRegistry, err := getDataSources(dataSourcefiles)`
	`193`	`+ dsRegistry, err := getDataSources(dataSourcefiles, prov)`
`194`	`194`	`if err != nil {`
`195`	`195`	`return fmt.Errorf("error getting data sources: %w", err)`
`196`	`196`	`}`
`@@ -502,7 +502,7 @@ func actionOptFromString(s *string, defAction models.ActionOpt) models.ActionOpt`
`502`	`502`	`return models.ActionOptUnknown`
`503`	`503`	`}`
`504`	`504`
`505`		`-func getDataSources(readers []os.File) (v1datasources.DataSourceRegistry, error) {`
	`505`	`+func getDataSources(readers []os.File, provider provifv1.Provider) (v1datasources.DataSourceRegistry, error) {`
`506`	`506`	`reg := v1datasources.NewDataSourceRegistry()`
`507`	`507`	`for _, r := range readers {`
`508`	`508`	`fname := r.Name()`
`@@ -515,7 +515,7 @@ func getDataSources(readers []os.File) (v1datasources.DataSourceRegistry, erro`
`515`	`515`	`return nil, fmt.Errorf("error validating data source %s: %w", fname, err)`
`516`	`516`	`}`
`517`	`517`
`518`		`- intds, err := internalds.BuildFromProtobuf(ds)`
	`518`	`+ intds, err := internalds.BuildFromProtobuf(ds, provider)`
`519`	`519`	`if err != nil {`
`520`	`520`	`return nil, fmt.Errorf("error building data source %s: %w", fname, err)`
`521`	`521`	`}`