coder/codersdk/templates.go

265 lines
8.8 KiB
Go
Raw Normal View History

package codersdk
import (
"context"
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/google/uuid"
"golang.org/x/xerrors"
)
// Template is the JSON representation of a Coder template. This type matches the
2022-04-01 19:42:36 +00:00
// database object for now, but is abstracted for ease of change later on.
type Template struct {
feat: Build framework for generating API docs (#5383) * WIP * Gen * WIP * chi swagger * WIP * WIP * WIP * GetWorkspaces * GetWorkspaces * Markdown * Use widdershins * WIP * WIP * WIP * Markdown template * Fix: makefile * fmt * Fix: comment * Enable swagger conditionally * fix: site * Default false * Flag tests * fix * fix * template fixes * Fix * Fix * Fix * WIP * Formatted * Cleanup * Templates * BEGIN END SECTION * subshell exit code * Fix * Fix merge * WIP * Fix * Fix fmt * Fix * Generic api.md page * Fix merge * Link pages * Fix * Fix * Fix: links * Add icon * Write manifest file * Fix fmt * Fix: enterprise * Fix: Swagger.Enable * Fix: rename apidocs to apidoc * Fix: find -not -prune * Fix: json not available * Fix: rename Coderd API to Coder API * Fix: npm exec * Fix: api dir * Fix: by ID * Fix: string uuid * Fix: include deleted * Fix: indirect go.mod * Fix: source lib.sh * Fix: shellcheck * Fix: pushd popd * Fix: fmt * Fix: improve workspaces * Fix: swagger-enable * Fix * Fix: mention only HTTP 200 * Fix: IDs * Fix: https * Fix: icon * More APis * Fix: format swagger.json * Fix: SwaggerEndpoint * Fix: SCRIPT_DIR * Fix: PROJECT_ROOT * Fix: use code tags in schemas.md * Fix: examples * Fix: examples * Fix: improve format * Fix: date-time,enums * Fix: include_deleted * Fix: array of * Fix: parameter, response * Fix: string time or null * Workspaces: more docs * Workspaces: more docs * Fix: renderDisplayName * Fix: ActiveUserCount * Fix * Fix: typo * Templates: docs * Notice: incomplete
2022-12-19 17:43:46 +00:00
ID uuid.UUID `json:"id" format:"uuid"`
CreatedAt time.Time `json:"created_at" format:"date-time"`
UpdatedAt time.Time `json:"updated_at" format:"date-time"`
OrganizationID uuid.UUID `json:"organization_id" format:"uuid"`
Name string `json:"name"`
DisplayName string `json:"display_name"`
Provisioner ProvisionerType `json:"provisioner" enums:"terraform"`
ActiveVersionID uuid.UUID `json:"active_version_id"`
WorkspaceOwnerCount uint32 `json:"workspace_owner_count"`
// ActiveUserCount is set to -1 when loading.
ActiveUserCount int `json:"active_user_count"`
BuildTimeStats TemplateBuildTimeStats `json:"build_time_stats"`
Description string `json:"description"`
Icon string `json:"icon"`
DefaultTTLMillis int64 `json:"default_ttl_ms"`
feat: Build framework for generating API docs (#5383) * WIP * Gen * WIP * chi swagger * WIP * WIP * WIP * GetWorkspaces * GetWorkspaces * Markdown * Use widdershins * WIP * WIP * WIP * Markdown template * Fix: makefile * fmt * Fix: comment * Enable swagger conditionally * fix: site * Default false * Flag tests * fix * fix * template fixes * Fix * Fix * Fix * WIP * Formatted * Cleanup * Templates * BEGIN END SECTION * subshell exit code * Fix * Fix merge * WIP * Fix * Fix fmt * Fix * Generic api.md page * Fix merge * Link pages * Fix * Fix * Fix: links * Add icon * Write manifest file * Fix fmt * Fix: enterprise * Fix: Swagger.Enable * Fix: rename apidocs to apidoc * Fix: find -not -prune * Fix: json not available * Fix: rename Coderd API to Coder API * Fix: npm exec * Fix: api dir * Fix: by ID * Fix: string uuid * Fix: include deleted * Fix: indirect go.mod * Fix: source lib.sh * Fix: shellcheck * Fix: pushd popd * Fix: fmt * Fix: improve workspaces * Fix: swagger-enable * Fix * Fix: mention only HTTP 200 * Fix: IDs * Fix: https * Fix: icon * More APis * Fix: format swagger.json * Fix: SwaggerEndpoint * Fix: SCRIPT_DIR * Fix: PROJECT_ROOT * Fix: use code tags in schemas.md * Fix: examples * Fix: examples * Fix: improve format * Fix: date-time,enums * Fix: include_deleted * Fix: array of * Fix: parameter, response * Fix: string time or null * Workspaces: more docs * Workspaces: more docs * Fix: renderDisplayName * Fix: ActiveUserCount * Fix * Fix: typo * Templates: docs * Notice: incomplete
2022-12-19 17:43:46 +00:00
CreatedByID uuid.UUID `json:"created_by_id" format:"uuid"`
CreatedByName string `json:"created_by_name"`
AllowUserCancelWorkspaceJobs bool `json:"allow_user_cancel_workspace_jobs"`
}
type TransitionStats struct {
feat: Build framework for generating API docs (#5383) * WIP * Gen * WIP * chi swagger * WIP * WIP * WIP * GetWorkspaces * GetWorkspaces * Markdown * Use widdershins * WIP * WIP * WIP * Markdown template * Fix: makefile * fmt * Fix: comment * Enable swagger conditionally * fix: site * Default false * Flag tests * fix * fix * template fixes * Fix * Fix * Fix * WIP * Formatted * Cleanup * Templates * BEGIN END SECTION * subshell exit code * Fix * Fix merge * WIP * Fix * Fix fmt * Fix * Generic api.md page * Fix merge * Link pages * Fix * Fix * Fix: links * Add icon * Write manifest file * Fix fmt * Fix: enterprise * Fix: Swagger.Enable * Fix: rename apidocs to apidoc * Fix: find -not -prune * Fix: json not available * Fix: rename Coderd API to Coder API * Fix: npm exec * Fix: api dir * Fix: by ID * Fix: string uuid * Fix: include deleted * Fix: indirect go.mod * Fix: source lib.sh * Fix: shellcheck * Fix: pushd popd * Fix: fmt * Fix: improve workspaces * Fix: swagger-enable * Fix * Fix: mention only HTTP 200 * Fix: IDs * Fix: https * Fix: icon * More APis * Fix: format swagger.json * Fix: SwaggerEndpoint * Fix: SCRIPT_DIR * Fix: PROJECT_ROOT * Fix: use code tags in schemas.md * Fix: examples * Fix: examples * Fix: improve format * Fix: date-time,enums * Fix: include_deleted * Fix: array of * Fix: parameter, response * Fix: string time or null * Workspaces: more docs * Workspaces: more docs * Fix: renderDisplayName * Fix: ActiveUserCount * Fix * Fix: typo * Templates: docs * Notice: incomplete
2022-12-19 17:43:46 +00:00
P50 *int64 `example:"123"`
P95 *int64 `example:"146"`
}
type TemplateBuildTimeStats map[WorkspaceTransition]TransitionStats
type UpdateActiveTemplateVersion struct {
ID uuid.UUID `json:"id" validate:"required"`
}
2022-10-10 20:37:06 +00:00
type TemplateRole string
const (
TemplateRoleAdmin TemplateRole = "admin"
2022-10-12 19:33:21 +00:00
TemplateRoleUse TemplateRole = "use"
2022-10-10 20:37:06 +00:00
TemplateRoleDeleted TemplateRole = ""
)
type TemplateACL struct {
Users []TemplateUser `json:"users"`
Groups []TemplateGroup `json:"group"`
}
type TemplateGroup struct {
Group
Role TemplateRole `json:"role"`
}
type TemplateUser struct {
User
Role TemplateRole `json:"role"`
}
type UpdateTemplateACL struct {
UserPerms map[string]TemplateRole `json:"user_perms,omitempty"`
GroupPerms map[string]TemplateRole `json:"group_perms,omitempty"`
}
type UpdateTemplateMeta struct {
Name string `json:"name,omitempty" validate:"omitempty,template_name"`
DisplayName string `json:"display_name,omitempty" validate:"omitempty,template_display_name"`
Description string `json:"description,omitempty"`
Icon string `json:"icon,omitempty"`
DefaultTTLMillis int64 `json:"default_ttl_ms,omitempty"`
AllowUserCancelWorkspaceJobs bool `json:"allow_user_cancel_workspace_jobs,omitempty"`
}
type TemplateExample struct {
ID string `json:"id"`
URL string `json:"url"`
Name string `json:"name"`
Description string `json:"description"`
Icon string `json:"icon"`
Tags []string `json:"tags"`
Markdown string `json:"markdown"`
}
// Template returns a single template.
func (c *Client) Template(ctx context.Context, template uuid.UUID) (Template, error) {
res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/templates/%s", template), nil)
if err != nil {
return Template{}, nil
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return Template{}, readBodyAsError(res)
}
var resp Template
return resp, json.NewDecoder(res.Body).Decode(&resp)
}
func (c *Client) DeleteTemplate(ctx context.Context, template uuid.UUID) error {
res, err := c.Request(ctx, http.MethodDelete, fmt.Sprintf("/api/v2/templates/%s", template), nil)
if err != nil {
return err
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return readBodyAsError(res)
}
return nil
}
func (c *Client) UpdateTemplateMeta(ctx context.Context, templateID uuid.UUID, req UpdateTemplateMeta) (Template, error) {
res, err := c.Request(ctx, http.MethodPatch, fmt.Sprintf("/api/v2/templates/%s", templateID), req)
if err != nil {
return Template{}, err
}
defer res.Body.Close()
if res.StatusCode == http.StatusNotModified {
return Template{}, xerrors.New("template metadata not modified")
}
if res.StatusCode != http.StatusOK {
return Template{}, readBodyAsError(res)
}
var updated Template
return updated, json.NewDecoder(res.Body).Decode(&updated)
}
2022-10-10 20:37:06 +00:00
func (c *Client) UpdateTemplateACL(ctx context.Context, templateID uuid.UUID, req UpdateTemplateACL) error {
res, err := c.Request(ctx, http.MethodPatch, fmt.Sprintf("/api/v2/templates/%s/acl", templateID), req)
if err != nil {
return err
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return readBodyAsError(res)
}
return nil
}
func (c *Client) TemplateACL(ctx context.Context, templateID uuid.UUID) (TemplateACL, error) {
res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/templates/%s/acl", templateID), nil)
if err != nil {
return TemplateACL{}, err
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return TemplateACL{}, readBodyAsError(res)
}
var acl TemplateACL
return acl, json.NewDecoder(res.Body).Decode(&acl)
}
// UpdateActiveTemplateVersion updates the active template version to the ID provided.
// The template version must be attached to the template.
func (c *Client) UpdateActiveTemplateVersion(ctx context.Context, template uuid.UUID, req UpdateActiveTemplateVersion) error {
res, err := c.Request(ctx, http.MethodPatch, fmt.Sprintf("/api/v2/templates/%s/versions", template), req)
if err != nil {
return nil
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return readBodyAsError(res)
}
return nil
}
// TemplateVersionsByTemplateRequest defines the request parameters for
// TemplateVersionsByTemplate.
type TemplateVersionsByTemplateRequest struct {
TemplateID uuid.UUID `json:"template_id" validate:"required"`
Pagination
}
// TemplateVersionsByTemplate lists versions associated with a template.
func (c *Client) TemplateVersionsByTemplate(ctx context.Context, req TemplateVersionsByTemplateRequest) ([]TemplateVersion, error) {
res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/templates/%s/versions", req.TemplateID), nil, req.Pagination.asRequestOption())
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, readBodyAsError(res)
}
var templateVersion []TemplateVersion
return templateVersion, json.NewDecoder(res.Body).Decode(&templateVersion)
feat: Add provisionerdaemon to coderd (#141) * feat: Add history middleware parameters These will be used for streaming logs, checking status, and other operations related to workspace and project history. * refactor: Move all HTTP routes to top-level struct Nesting all structs behind their respective structures is leaky, and promotes naming conflicts between handlers. Our HTTP routes cannot have conflicts, so neither should function naming. * Add provisioner daemon routes * Add periodic updates * Skip pubsub if short * Return jobs with WorkspaceHistory * Add endpoints for extracting singular history * The full end-to-end operation works * fix: Disable compression for websocket dRPC transport (#145) There is a race condition in the interop between the websocket and `dRPC`: https://github.com/coder/coder/runs/5038545709?check_suite_focus=true#step:7:117 - it seems both the websocket and dRPC feel like they own the `byte[]` being sent between them. This can lead to data races, in which both `dRPC` and the websocket are writing. This is just tracking some experimentation to fix that race condition ## Run results: ## - Run 1: peer test failure - Run 2: peer test failure - Run 3: `TestWorkspaceHistory/CreateHistory` - https://github.com/coder/coder/runs/5040858460?check_suite_focus=true#step:8:45 ``` status code 412: The provided project history is running. Wait for it to complete importing!` ``` - Run 4: `TestWorkspaceHistory/CreateHistory` - https://github.com/coder/coder/runs/5040957999?check_suite_focus=true#step:7:176 ``` workspacehistory_test.go:122: Error Trace: workspacehistory_test.go:122 Error: Condition never satisfied Test: TestWorkspaceHistory/CreateHistory ``` - Run 5: peer failure - Run 6: Pass ✅ - Run 7: Peer failure ## Open Questions: ## ### Is `dRPC` or `websocket` at fault for the data race? It looks like this condition is specifically happening when `dRPC` decides to [`SendError`]). This constructs a new byte payload from [`MarshalError`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/error.go#L15) - so `dRPC` has created this buffer and owns it. From `dRPC`'s perspective, the callstack looks like this: - [`sendPacket`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcstream/stream.go#L253) - [`writeFrame`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/writer.go#L65) - [`AppendFrame`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/packet.go#L128) - with finally the data race happening here: ```go // AppendFrame appends a marshaled form of the frame to the provided buffer. func AppendFrame(buf []byte, fr Frame) []byte { ... out := buf out = append(out, control). // <--------- ``` This should be fine, since `dPRC` create this buffer, and is taking the byte buffer constructed from `MarshalError` and tacking a bunch of headers on it to create a proper frame. Once `dRPC` is done writing, it _hangs onto the buffer and resets it here__: https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/writer.go#L73 However... the websocket implementation, once it gets the buffer, it runs a `statelessDeflate` [here](https://github.com/nhooyr/websocket/blob/8dee580a7f74cf1713400307b4eee514b927870f/write.go#L180), which compresses the buffer on the fly. This functionality actually [mutates the buffer in place](https://github.com/klauspost/compress/blob/a1a9cfc821f00faf2f5231beaa96244344d50391/flate/stateless.go#L94), which is where get our race. In the case where the `byte[]` aren't being manipulated anywhere else, this compress-in-place operation would be safe, and that's probably the case for most over-the-wire usages. In this case, though, where we're plumbing `dRPC` -> websocket, they both are manipulating it (`dRPC` is reusing the buffer for the next `write`, and `websocket` is compressing on the fly). ### Why does cloning on `Read` fail? Get a bunch of errors like: ``` 2022/02/02 19:26:10 [WARN] yamux: frame for missing stream: Vsn:0 Type:0 Flags:0 StreamID:0 Length:0 2022/02/02 19:26:25 [ERR] yamux: Failed to read header: unexpected EOF 2022/02/02 19:26:25 [ERR] yamux: Failed to read header: unexpected EOF 2022/02/02 19:26:25 [WARN] yamux: frame for missing stream: Vsn:0 Type:0 Flags:0 StreamID:0 Length:0 ``` # UPDATE: We decided we could disable websocket compression, which would avoid the race because the in-place `deflate` operaton would no longer be run. Trying that out now: - Run 1: ✅ - Run 2: https://github.com/coder/coder/runs/5042645522?check_suite_focus=true#step:8:338 - Run 3: ✅ - Run 4: https://github.com/coder/coder/runs/5042988758?check_suite_focus=true#step:7:168 - Run 5: ✅ * fix: Remove race condition with acquiredJobDone channel (#148) Found another data race while running the tests: https://github.com/coder/coder/runs/5044320845?check_suite_focus=true#step:7:83 __Issue:__ There is a race in the p.acquiredJobDone chan - in particular, there can be a case where we're waiting on the channel to finish (in close) with <-p.acquiredJobDone, but in parallel, an acquireJob could've been started, which would create a new channel for p.acquiredJobDone. There is a similar race in `close(..)`ing the channel, which also came up in test runs. __Fix:__ Instead of recreating the channel everytime, we can use `sync.WaitGroup` to accomplish the same functionality - a semaphore to make close wait for the current job to wrap up. * fix: Bump up workspace history timeout (#149) This is an attempted fix for failures like: https://github.com/coder/coder/runs/5043435263?check_suite_focus=true#step:7:32 Looking at the timing of the test: ``` t.go:56: 2022-02-02 21:33:21.964 [DEBUG] (terraform-provisioner) <provision.go:139> ran apply t.go:56: 2022-02-02 21:33:21.991 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.050 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.090 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.140 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.195 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.240 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running workspacehistory_test.go:122: Error Trace: workspacehistory_test.go:122 Error: Condition never satisfied Test: TestWorkspaceHistory/CreateHistory ``` It appears that the `terraform apply` job had just finished - with less than a second to spare until our `require.Eventually` completes - but there's still work to be done (ie, collecting the state files). So my suspicion is that terraform might, in some cases, exceed our 5s timeout. Note that in the setup for this test - there is a similar project history wait that waits for 15s, so I borrowed that here. In the future - we can look at potentially using a simple echo provider to exercise this in the unit test, in a way that is more reliable in terms of timing. I'll log an issue to track that. Co-authored-by: Bryan <bryan@coder.com>
2022-02-03 20:34:50 +00:00
}
// TemplateVersionByName returns a template version by it's friendly name.
// This is used for path-based routing. Like: /templates/example/versions/helloworld
func (c *Client) TemplateVersionByName(ctx context.Context, template uuid.UUID, name string) (TemplateVersion, error) {
res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/templates/%s/versions/%s", template, name), nil)
feat: Add provisionerdaemon to coderd (#141) * feat: Add history middleware parameters These will be used for streaming logs, checking status, and other operations related to workspace and project history. * refactor: Move all HTTP routes to top-level struct Nesting all structs behind their respective structures is leaky, and promotes naming conflicts between handlers. Our HTTP routes cannot have conflicts, so neither should function naming. * Add provisioner daemon routes * Add periodic updates * Skip pubsub if short * Return jobs with WorkspaceHistory * Add endpoints for extracting singular history * The full end-to-end operation works * fix: Disable compression for websocket dRPC transport (#145) There is a race condition in the interop between the websocket and `dRPC`: https://github.com/coder/coder/runs/5038545709?check_suite_focus=true#step:7:117 - it seems both the websocket and dRPC feel like they own the `byte[]` being sent between them. This can lead to data races, in which both `dRPC` and the websocket are writing. This is just tracking some experimentation to fix that race condition ## Run results: ## - Run 1: peer test failure - Run 2: peer test failure - Run 3: `TestWorkspaceHistory/CreateHistory` - https://github.com/coder/coder/runs/5040858460?check_suite_focus=true#step:8:45 ``` status code 412: The provided project history is running. Wait for it to complete importing!` ``` - Run 4: `TestWorkspaceHistory/CreateHistory` - https://github.com/coder/coder/runs/5040957999?check_suite_focus=true#step:7:176 ``` workspacehistory_test.go:122: Error Trace: workspacehistory_test.go:122 Error: Condition never satisfied Test: TestWorkspaceHistory/CreateHistory ``` - Run 5: peer failure - Run 6: Pass ✅ - Run 7: Peer failure ## Open Questions: ## ### Is `dRPC` or `websocket` at fault for the data race? It looks like this condition is specifically happening when `dRPC` decides to [`SendError`]). This constructs a new byte payload from [`MarshalError`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/error.go#L15) - so `dRPC` has created this buffer and owns it. From `dRPC`'s perspective, the callstack looks like this: - [`sendPacket`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcstream/stream.go#L253) - [`writeFrame`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/writer.go#L65) - [`AppendFrame`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/packet.go#L128) - with finally the data race happening here: ```go // AppendFrame appends a marshaled form of the frame to the provided buffer. func AppendFrame(buf []byte, fr Frame) []byte { ... out := buf out = append(out, control). // <--------- ``` This should be fine, since `dPRC` create this buffer, and is taking the byte buffer constructed from `MarshalError` and tacking a bunch of headers on it to create a proper frame. Once `dRPC` is done writing, it _hangs onto the buffer and resets it here__: https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/writer.go#L73 However... the websocket implementation, once it gets the buffer, it runs a `statelessDeflate` [here](https://github.com/nhooyr/websocket/blob/8dee580a7f74cf1713400307b4eee514b927870f/write.go#L180), which compresses the buffer on the fly. This functionality actually [mutates the buffer in place](https://github.com/klauspost/compress/blob/a1a9cfc821f00faf2f5231beaa96244344d50391/flate/stateless.go#L94), which is where get our race. In the case where the `byte[]` aren't being manipulated anywhere else, this compress-in-place operation would be safe, and that's probably the case for most over-the-wire usages. In this case, though, where we're plumbing `dRPC` -> websocket, they both are manipulating it (`dRPC` is reusing the buffer for the next `write`, and `websocket` is compressing on the fly). ### Why does cloning on `Read` fail? Get a bunch of errors like: ``` 2022/02/02 19:26:10 [WARN] yamux: frame for missing stream: Vsn:0 Type:0 Flags:0 StreamID:0 Length:0 2022/02/02 19:26:25 [ERR] yamux: Failed to read header: unexpected EOF 2022/02/02 19:26:25 [ERR] yamux: Failed to read header: unexpected EOF 2022/02/02 19:26:25 [WARN] yamux: frame for missing stream: Vsn:0 Type:0 Flags:0 StreamID:0 Length:0 ``` # UPDATE: We decided we could disable websocket compression, which would avoid the race because the in-place `deflate` operaton would no longer be run. Trying that out now: - Run 1: ✅ - Run 2: https://github.com/coder/coder/runs/5042645522?check_suite_focus=true#step:8:338 - Run 3: ✅ - Run 4: https://github.com/coder/coder/runs/5042988758?check_suite_focus=true#step:7:168 - Run 5: ✅ * fix: Remove race condition with acquiredJobDone channel (#148) Found another data race while running the tests: https://github.com/coder/coder/runs/5044320845?check_suite_focus=true#step:7:83 __Issue:__ There is a race in the p.acquiredJobDone chan - in particular, there can be a case where we're waiting on the channel to finish (in close) with <-p.acquiredJobDone, but in parallel, an acquireJob could've been started, which would create a new channel for p.acquiredJobDone. There is a similar race in `close(..)`ing the channel, which also came up in test runs. __Fix:__ Instead of recreating the channel everytime, we can use `sync.WaitGroup` to accomplish the same functionality - a semaphore to make close wait for the current job to wrap up. * fix: Bump up workspace history timeout (#149) This is an attempted fix for failures like: https://github.com/coder/coder/runs/5043435263?check_suite_focus=true#step:7:32 Looking at the timing of the test: ``` t.go:56: 2022-02-02 21:33:21.964 [DEBUG] (terraform-provisioner) <provision.go:139> ran apply t.go:56: 2022-02-02 21:33:21.991 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.050 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.090 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.140 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.195 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.240 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running workspacehistory_test.go:122: Error Trace: workspacehistory_test.go:122 Error: Condition never satisfied Test: TestWorkspaceHistory/CreateHistory ``` It appears that the `terraform apply` job had just finished - with less than a second to spare until our `require.Eventually` completes - but there's still work to be done (ie, collecting the state files). So my suspicion is that terraform might, in some cases, exceed our 5s timeout. Note that in the setup for this test - there is a similar project history wait that waits for 15s, so I borrowed that here. In the future - we can look at potentially using a simple echo provider to exercise this in the unit test, in a way that is more reliable in terms of timing. I'll log an issue to track that. Co-authored-by: Bryan <bryan@coder.com>
2022-02-03 20:34:50 +00:00
if err != nil {
return TemplateVersion{}, err
feat: Add provisionerdaemon to coderd (#141) * feat: Add history middleware parameters These will be used for streaming logs, checking status, and other operations related to workspace and project history. * refactor: Move all HTTP routes to top-level struct Nesting all structs behind their respective structures is leaky, and promotes naming conflicts between handlers. Our HTTP routes cannot have conflicts, so neither should function naming. * Add provisioner daemon routes * Add periodic updates * Skip pubsub if short * Return jobs with WorkspaceHistory * Add endpoints for extracting singular history * The full end-to-end operation works * fix: Disable compression for websocket dRPC transport (#145) There is a race condition in the interop between the websocket and `dRPC`: https://github.com/coder/coder/runs/5038545709?check_suite_focus=true#step:7:117 - it seems both the websocket and dRPC feel like they own the `byte[]` being sent between them. This can lead to data races, in which both `dRPC` and the websocket are writing. This is just tracking some experimentation to fix that race condition ## Run results: ## - Run 1: peer test failure - Run 2: peer test failure - Run 3: `TestWorkspaceHistory/CreateHistory` - https://github.com/coder/coder/runs/5040858460?check_suite_focus=true#step:8:45 ``` status code 412: The provided project history is running. Wait for it to complete importing!` ``` - Run 4: `TestWorkspaceHistory/CreateHistory` - https://github.com/coder/coder/runs/5040957999?check_suite_focus=true#step:7:176 ``` workspacehistory_test.go:122: Error Trace: workspacehistory_test.go:122 Error: Condition never satisfied Test: TestWorkspaceHistory/CreateHistory ``` - Run 5: peer failure - Run 6: Pass ✅ - Run 7: Peer failure ## Open Questions: ## ### Is `dRPC` or `websocket` at fault for the data race? It looks like this condition is specifically happening when `dRPC` decides to [`SendError`]). This constructs a new byte payload from [`MarshalError`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/error.go#L15) - so `dRPC` has created this buffer and owns it. From `dRPC`'s perspective, the callstack looks like this: - [`sendPacket`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcstream/stream.go#L253) - [`writeFrame`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/writer.go#L65) - [`AppendFrame`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/packet.go#L128) - with finally the data race happening here: ```go // AppendFrame appends a marshaled form of the frame to the provided buffer. func AppendFrame(buf []byte, fr Frame) []byte { ... out := buf out = append(out, control). // <--------- ``` This should be fine, since `dPRC` create this buffer, and is taking the byte buffer constructed from `MarshalError` and tacking a bunch of headers on it to create a proper frame. Once `dRPC` is done writing, it _hangs onto the buffer and resets it here__: https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/writer.go#L73 However... the websocket implementation, once it gets the buffer, it runs a `statelessDeflate` [here](https://github.com/nhooyr/websocket/blob/8dee580a7f74cf1713400307b4eee514b927870f/write.go#L180), which compresses the buffer on the fly. This functionality actually [mutates the buffer in place](https://github.com/klauspost/compress/blob/a1a9cfc821f00faf2f5231beaa96244344d50391/flate/stateless.go#L94), which is where get our race. In the case where the `byte[]` aren't being manipulated anywhere else, this compress-in-place operation would be safe, and that's probably the case for most over-the-wire usages. In this case, though, where we're plumbing `dRPC` -> websocket, they both are manipulating it (`dRPC` is reusing the buffer for the next `write`, and `websocket` is compressing on the fly). ### Why does cloning on `Read` fail? Get a bunch of errors like: ``` 2022/02/02 19:26:10 [WARN] yamux: frame for missing stream: Vsn:0 Type:0 Flags:0 StreamID:0 Length:0 2022/02/02 19:26:25 [ERR] yamux: Failed to read header: unexpected EOF 2022/02/02 19:26:25 [ERR] yamux: Failed to read header: unexpected EOF 2022/02/02 19:26:25 [WARN] yamux: frame for missing stream: Vsn:0 Type:0 Flags:0 StreamID:0 Length:0 ``` # UPDATE: We decided we could disable websocket compression, which would avoid the race because the in-place `deflate` operaton would no longer be run. Trying that out now: - Run 1: ✅ - Run 2: https://github.com/coder/coder/runs/5042645522?check_suite_focus=true#step:8:338 - Run 3: ✅ - Run 4: https://github.com/coder/coder/runs/5042988758?check_suite_focus=true#step:7:168 - Run 5: ✅ * fix: Remove race condition with acquiredJobDone channel (#148) Found another data race while running the tests: https://github.com/coder/coder/runs/5044320845?check_suite_focus=true#step:7:83 __Issue:__ There is a race in the p.acquiredJobDone chan - in particular, there can be a case where we're waiting on the channel to finish (in close) with <-p.acquiredJobDone, but in parallel, an acquireJob could've been started, which would create a new channel for p.acquiredJobDone. There is a similar race in `close(..)`ing the channel, which also came up in test runs. __Fix:__ Instead of recreating the channel everytime, we can use `sync.WaitGroup` to accomplish the same functionality - a semaphore to make close wait for the current job to wrap up. * fix: Bump up workspace history timeout (#149) This is an attempted fix for failures like: https://github.com/coder/coder/runs/5043435263?check_suite_focus=true#step:7:32 Looking at the timing of the test: ``` t.go:56: 2022-02-02 21:33:21.964 [DEBUG] (terraform-provisioner) <provision.go:139> ran apply t.go:56: 2022-02-02 21:33:21.991 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.050 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.090 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.140 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.195 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.240 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running workspacehistory_test.go:122: Error Trace: workspacehistory_test.go:122 Error: Condition never satisfied Test: TestWorkspaceHistory/CreateHistory ``` It appears that the `terraform apply` job had just finished - with less than a second to spare until our `require.Eventually` completes - but there's still work to be done (ie, collecting the state files). So my suspicion is that terraform might, in some cases, exceed our 5s timeout. Note that in the setup for this test - there is a similar project history wait that waits for 15s, so I borrowed that here. In the future - we can look at potentially using a simple echo provider to exercise this in the unit test, in a way that is more reliable in terms of timing. I'll log an issue to track that. Co-authored-by: Bryan <bryan@coder.com>
2022-02-03 20:34:50 +00:00
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return TemplateVersion{}, readBodyAsError(res)
feat: Add provisionerdaemon to coderd (#141) * feat: Add history middleware parameters These will be used for streaming logs, checking status, and other operations related to workspace and project history. * refactor: Move all HTTP routes to top-level struct Nesting all structs behind their respective structures is leaky, and promotes naming conflicts between handlers. Our HTTP routes cannot have conflicts, so neither should function naming. * Add provisioner daemon routes * Add periodic updates * Skip pubsub if short * Return jobs with WorkspaceHistory * Add endpoints for extracting singular history * The full end-to-end operation works * fix: Disable compression for websocket dRPC transport (#145) There is a race condition in the interop between the websocket and `dRPC`: https://github.com/coder/coder/runs/5038545709?check_suite_focus=true#step:7:117 - it seems both the websocket and dRPC feel like they own the `byte[]` being sent between them. This can lead to data races, in which both `dRPC` and the websocket are writing. This is just tracking some experimentation to fix that race condition ## Run results: ## - Run 1: peer test failure - Run 2: peer test failure - Run 3: `TestWorkspaceHistory/CreateHistory` - https://github.com/coder/coder/runs/5040858460?check_suite_focus=true#step:8:45 ``` status code 412: The provided project history is running. Wait for it to complete importing!` ``` - Run 4: `TestWorkspaceHistory/CreateHistory` - https://github.com/coder/coder/runs/5040957999?check_suite_focus=true#step:7:176 ``` workspacehistory_test.go:122: Error Trace: workspacehistory_test.go:122 Error: Condition never satisfied Test: TestWorkspaceHistory/CreateHistory ``` - Run 5: peer failure - Run 6: Pass ✅ - Run 7: Peer failure ## Open Questions: ## ### Is `dRPC` or `websocket` at fault for the data race? It looks like this condition is specifically happening when `dRPC` decides to [`SendError`]). This constructs a new byte payload from [`MarshalError`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/error.go#L15) - so `dRPC` has created this buffer and owns it. From `dRPC`'s perspective, the callstack looks like this: - [`sendPacket`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcstream/stream.go#L253) - [`writeFrame`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/writer.go#L65) - [`AppendFrame`](https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/packet.go#L128) - with finally the data race happening here: ```go // AppendFrame appends a marshaled form of the frame to the provided buffer. func AppendFrame(buf []byte, fr Frame) []byte { ... out := buf out = append(out, control). // <--------- ``` This should be fine, since `dPRC` create this buffer, and is taking the byte buffer constructed from `MarshalError` and tacking a bunch of headers on it to create a proper frame. Once `dRPC` is done writing, it _hangs onto the buffer and resets it here__: https://github.com/storj/drpc/blob/f6e369438f636b47ee788095d3fc13062ffbd019/drpcwire/writer.go#L73 However... the websocket implementation, once it gets the buffer, it runs a `statelessDeflate` [here](https://github.com/nhooyr/websocket/blob/8dee580a7f74cf1713400307b4eee514b927870f/write.go#L180), which compresses the buffer on the fly. This functionality actually [mutates the buffer in place](https://github.com/klauspost/compress/blob/a1a9cfc821f00faf2f5231beaa96244344d50391/flate/stateless.go#L94), which is where get our race. In the case where the `byte[]` aren't being manipulated anywhere else, this compress-in-place operation would be safe, and that's probably the case for most over-the-wire usages. In this case, though, where we're plumbing `dRPC` -> websocket, they both are manipulating it (`dRPC` is reusing the buffer for the next `write`, and `websocket` is compressing on the fly). ### Why does cloning on `Read` fail? Get a bunch of errors like: ``` 2022/02/02 19:26:10 [WARN] yamux: frame for missing stream: Vsn:0 Type:0 Flags:0 StreamID:0 Length:0 2022/02/02 19:26:25 [ERR] yamux: Failed to read header: unexpected EOF 2022/02/02 19:26:25 [ERR] yamux: Failed to read header: unexpected EOF 2022/02/02 19:26:25 [WARN] yamux: frame for missing stream: Vsn:0 Type:0 Flags:0 StreamID:0 Length:0 ``` # UPDATE: We decided we could disable websocket compression, which would avoid the race because the in-place `deflate` operaton would no longer be run. Trying that out now: - Run 1: ✅ - Run 2: https://github.com/coder/coder/runs/5042645522?check_suite_focus=true#step:8:338 - Run 3: ✅ - Run 4: https://github.com/coder/coder/runs/5042988758?check_suite_focus=true#step:7:168 - Run 5: ✅ * fix: Remove race condition with acquiredJobDone channel (#148) Found another data race while running the tests: https://github.com/coder/coder/runs/5044320845?check_suite_focus=true#step:7:83 __Issue:__ There is a race in the p.acquiredJobDone chan - in particular, there can be a case where we're waiting on the channel to finish (in close) with <-p.acquiredJobDone, but in parallel, an acquireJob could've been started, which would create a new channel for p.acquiredJobDone. There is a similar race in `close(..)`ing the channel, which also came up in test runs. __Fix:__ Instead of recreating the channel everytime, we can use `sync.WaitGroup` to accomplish the same functionality - a semaphore to make close wait for the current job to wrap up. * fix: Bump up workspace history timeout (#149) This is an attempted fix for failures like: https://github.com/coder/coder/runs/5043435263?check_suite_focus=true#step:7:32 Looking at the timing of the test: ``` t.go:56: 2022-02-02 21:33:21.964 [DEBUG] (terraform-provisioner) <provision.go:139> ran apply t.go:56: 2022-02-02 21:33:21.991 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.050 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.090 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.140 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.195 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running t.go:56: 2022-02-02 21:33:22.240 [DEBUG] (provisionerd) <provisionerd.go:162> skipping acquire; job is already running workspacehistory_test.go:122: Error Trace: workspacehistory_test.go:122 Error: Condition never satisfied Test: TestWorkspaceHistory/CreateHistory ``` It appears that the `terraform apply` job had just finished - with less than a second to spare until our `require.Eventually` completes - but there's still work to be done (ie, collecting the state files). So my suspicion is that terraform might, in some cases, exceed our 5s timeout. Note that in the setup for this test - there is a similar project history wait that waits for 15s, so I borrowed that here. In the future - we can look at potentially using a simple echo provider to exercise this in the unit test, in a way that is more reliable in terms of timing. I'll log an issue to track that. Co-authored-by: Bryan <bryan@coder.com>
2022-02-03 20:34:50 +00:00
}
var templateVersion TemplateVersion
return templateVersion, json.NewDecoder(res.Body).Decode(&templateVersion)
}
type DAUEntry struct {
Date time.Time `json:"date"`
Amount int `json:"amount"`
}
type TemplateDAUsResponse struct {
Entries []DAUEntry `json:"entries"`
}
func (c *Client) TemplateDAUs(ctx context.Context, templateID uuid.UUID) (*TemplateDAUsResponse, error) {
res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/templates/%s/daus", templateID), nil)
if err != nil {
return nil, xerrors.Errorf("execute request: %w", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, readBodyAsError(res)
}
var resp TemplateDAUsResponse
return &resp, json.NewDecoder(res.Body).Decode(&resp)
}
// AgentStatsReportRequest is a WebSocket request by coderd
// to the agent for stats.
// @typescript-ignore AgentStatsReportRequest
type AgentStatsReportRequest struct {
}
// AgentStatsReportResponse is returned for each report
// request by the agent.
type AgentStatsReportResponse struct {
NumConns int64 `json:"num_comms"`
// RxBytes is the number of received bytes.
RxBytes int64 `json:"rx_bytes"`
// TxBytes is the number of transmitted bytes.
TxBytes int64 `json:"tx_bytes"`
}
// TemplateExamples lists example templates embedded in coder.
func (c *Client) TemplateExamples(ctx context.Context, organizationID uuid.UUID) ([]TemplateExample, error) {
res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/organizations/%s/templates/examples", organizationID), nil)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, readBodyAsError(res)
}
var templateExamples []TemplateExample
return templateExamples, json.NewDecoder(res.Body).Decode(&templateExamples)
}