diff --git a/cmd/cmd.go b/cmd/cmd.go
index 2301efdfe..92e62ca7f 100644
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -182,6 +182,10 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 				mfConfig.System = cmd.Args
 			case "license":
 				mfConfig.License = cmd.Args
+			case "parser":
+				mfConfig.Parser = cmd.Args
+			case "renderer":
+				mfConfig.Renderer = cmd.Args
 			}
 		}
 
diff --git a/model/parsers/parsers.go b/model/parsers/parsers.go
index 2b471d1da..fa9f8b598 100644
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -45,6 +45,10 @@ func ParserForName(name string) Parser {
 	var p Parser
 
 	switch name {
+	case "qwen3":
+		p = &Qwen3Parser{hasThinkingSupport: false, defaultThinking: false}
+	case "qwen3-thinking":
+		p = &Qwen3Parser{hasThinkingSupport: true, defaultThinking: true}
 	case "qwen3-coder":
 		p = &Qwen3CoderParser{}
 	case "qwen3-vl-instruct":
diff --git a/model/parsers/parsers_test.go b/model/parsers/parsers_test.go
index 4f8566de3..15c2f664f 100644
--- a/model/parsers/parsers_test.go
+++ b/model/parsers/parsers_test.go
@@ -54,6 +54,8 @@ func TestBuiltInParsersStillWork(t *testing.T) {
 		name string
 	}{
 		{"passthrough"},
+		{"qwen3"},
+		{"qwen3-thinking"},
 		{"qwen3-coder"},
 		{"harmony"},
 	}
diff --git a/model/parsers/qwen3.go b/model/parsers/qwen3.go
new file mode 100644
index 000000000..e49111fb5
--- /dev/null
+++ b/model/parsers/qwen3.go
@@ -0,0 +1,335 @@
+package parsers
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"strings"
+	"unicode"
+
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/logutil"
+)
+
+type qwen3ParserState int
+
+const (
+	qwen3ParserStateLookingForThinkingOpen qwen3ParserState = iota
+	qwen3ParserStateThinkingStartedEatingWhitespace
+	qwen3ParserStateCollectingThinking
+	qwen3ParserStateThinkingDoneEatingWhitespace
+	qwen3ParserStateCollectingContent
+	qwen3ParserStateToolStartedEatingWhitespace
+	qwen3ParserStateCollectingToolContent
+)
+
+const (
+	qwen3ThinkingOpenTag  = "<think>"
+	qwen3ThinkingCloseTag = "</think>"
+	qwen3ToolOpenTag      = "<tool_call>"
+	qwen3ToolCloseTag     = "</tool_call>"
+)
+
+// Qwen3Parser parses Qwen3 output to extract thinking and tool calls.
+// Qwen3 prompts end with <think> when thinking is enabled, so output begins
+// with thinking content directly (without an opening tag).
+type Qwen3Parser struct {
+	state                  qwen3ParserState
+	buffer                 strings.Builder
+	tools                  []api.Tool
+	hasThinkingSupport     bool
+	defaultThinking        bool
+	maybeThinkingOpenAtBOL bool
+}
+
+func (p *Qwen3Parser) HasToolSupport() bool {
+	return true
+}
+
+func (p *Qwen3Parser) HasThinkingSupport() bool {
+	return p.hasThinkingSupport
+}
+
+func (p *Qwen3Parser) Init(tools []api.Tool, lastMessage *api.Message, thinkValue *api.ThinkValue) []api.Tool {
+	p.tools = tools
+	p.buffer.Reset()
+
+	thinkingEnabled := thinkValue != nil && thinkValue.Bool()
+	if thinkValue == nil {
+		thinkingEnabled = p.defaultThinking
+	}
+
+	if p.hasThinkingSupport && thinkingEnabled {
+		p.state = qwen3ParserStateCollectingThinking
+		p.maybeThinkingOpenAtBOL = true
+	} else {
+		p.state = qwen3ParserStateCollectingContent
+		p.maybeThinkingOpenAtBOL = false
+	}
+	return tools
+}
+
+type qwen3Event interface {
+	isQwen3Event()
+}
+
+type qwen3EventContent struct {
+	content string
+}
+
+func (qwen3EventContent) isQwen3Event() {}
+
+type qwen3EventRawToolCall struct {
+	raw string
+}
+
+func (qwen3EventRawToolCall) isQwen3Event() {}
+
+type qwen3EventThinkingContent struct {
+	content string
+}
+
+func (qwen3EventThinkingContent) isQwen3Event() {}
+
+func (p *Qwen3Parser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
+	p.buffer.WriteString(s)
+	events := p.parseEvents()
+
+	var contentSb strings.Builder
+	var thinkingSb strings.Builder
+	for _, event := range events {
+		switch event := event.(type) {
+		case qwen3EventRawToolCall:
+			toolCall, err := parseQwen3ToolCall(event, p.tools)
+			if err != nil {
+				slog.Warn("qwen3 tool call parsing failed", "error", err)
+				return "", "", nil, err
+			}
+			calls = append(calls, toolCall)
+		case qwen3EventThinkingContent:
+			thinkingSb.WriteString(event.content)
+		case qwen3EventContent:
+			contentSb.WriteString(event.content)
+		}
+	}
+
+	return contentSb.String(), thinkingSb.String(), calls, nil
+}
+
+func (p *Qwen3Parser) parseEvents() []qwen3Event {
+	var all []qwen3Event
+
+	keepLooping := true
+	for keepLooping {
+		var events []qwen3Event
+		events, keepLooping = p.eat()
+		if len(events) > 0 {
+			all = append(all, events...)
+		}
+	}
+
+	if len(all) > 0 {
+		slog.Log(context.TODO(), logutil.LevelTrace, "qwen3 events parsed", "events", all, "state", p.state, "buffer", p.buffer.String())
+	}
+
+	return all
+}
+
+func (p *Qwen3Parser) eatLeadingWhitespaceAndTransitionTo(nextState qwen3ParserState) ([]qwen3Event, bool) {
+	trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
+	p.buffer.Reset()
+	if trimmed == "" {
+		return nil, false
+	}
+	p.state = nextState
+	p.buffer.WriteString(trimmed)
+	return nil, true
+}
+
+func (p *Qwen3Parser) splitAtTag(tag string, trimAfter bool) (string, string) {
+	return splitAtTag(&p.buffer, tag, trimAfter)
+}
+
+func (p *Qwen3Parser) eat() ([]qwen3Event, bool) {
+	var events []qwen3Event
+
+	switch p.state {
+	case qwen3ParserStateLookingForThinkingOpen:
+		trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
+		if strings.HasPrefix(trimmed, qwen3ThinkingOpenTag) {
+			after := strings.TrimPrefix(trimmed, qwen3ThinkingOpenTag)
+			after = strings.TrimLeftFunc(after, unicode.IsSpace)
+			p.buffer.Reset()
+			p.buffer.WriteString(after)
+			if after == "" {
+				p.state = qwen3ParserStateThinkingStartedEatingWhitespace
+			} else {
+				p.state = qwen3ParserStateCollectingThinking
+			}
+			return events, true
+		} else if strings.HasPrefix(qwen3ThinkingOpenTag, trimmed) {
+			return events, false
+		} else if trimmed == "" {
+			return events, false
+		}
+		p.state = qwen3ParserStateCollectingContent
+		return events, true
+
+	case qwen3ParserStateThinkingStartedEatingWhitespace:
+		return p.eatLeadingWhitespaceAndTransitionTo(qwen3ParserStateCollectingThinking)
+
+	case qwen3ParserStateCollectingThinking:
+		acc := p.buffer.String()
+
+		// Some qwen3 checkpoints emit an explicit opening <think> tag even
+		// though the prompt already ended with <think>. Strip exactly one
+		// leading opening tag if present.
+		if p.maybeThinkingOpenAtBOL {
+			trimmed := strings.TrimLeftFunc(acc, unicode.IsSpace)
+			if strings.HasPrefix(trimmed, qwen3ThinkingOpenTag) {
+				after := strings.TrimPrefix(trimmed, qwen3ThinkingOpenTag)
+				after = strings.TrimLeftFunc(after, unicode.IsSpace)
+				p.buffer.Reset()
+				p.buffer.WriteString(after)
+				if after == "" {
+					return events, false
+				}
+				p.maybeThinkingOpenAtBOL = false
+				return events, true
+			}
+			if strings.HasPrefix(qwen3ThinkingOpenTag, trimmed) {
+				return events, false
+			}
+			p.maybeThinkingOpenAtBOL = false
+		}
+
+		if strings.Contains(acc, qwen3ThinkingCloseTag) {
+			thinking, remaining := p.splitAtTag(qwen3ThinkingCloseTag, true)
+			if len(thinking) > 0 {
+				events = append(events, qwen3EventThinkingContent{content: thinking})
+			}
+			if remaining == "" {
+				p.state = qwen3ParserStateThinkingDoneEatingWhitespace
+			} else {
+				p.state = qwen3ParserStateCollectingContent
+			}
+			return events, true
+		} else if overlapLen := overlap(acc, qwen3ThinkingCloseTag); overlapLen > 0 {
+			beforePartialTag := acc[:len(acc)-overlapLen]
+			trailingWsLen := trailingWhitespaceLen(beforePartialTag)
+			ambiguousStart := len(beforePartialTag) - trailingWsLen
+
+			unambiguous := acc[:ambiguousStart]
+			ambiguous := acc[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, qwen3EventThinkingContent{content: unambiguous})
+			}
+			return events, false
+		}
+
+		whitespaceLen := trailingWhitespaceLen(acc)
+		ambiguousStart := len(acc) - whitespaceLen
+		unambiguous := acc[:ambiguousStart]
+		ambiguous := acc[ambiguousStart:]
+		p.buffer.Reset()
+		p.buffer.WriteString(ambiguous)
+		if len(unambiguous) > 0 {
+			events = append(events, qwen3EventThinkingContent{content: unambiguous})
+		}
+		return events, false
+
+	case qwen3ParserStateThinkingDoneEatingWhitespace:
+		return p.eatLeadingWhitespaceAndTransitionTo(qwen3ParserStateCollectingContent)
+
+	case qwen3ParserStateCollectingContent:
+		acc := p.buffer.String()
+		if strings.Contains(acc, qwen3ToolOpenTag) {
+			before, after := p.splitAtTag(qwen3ToolOpenTag, true)
+			if len(before) > 0 {
+				events = append(events, qwen3EventContent{content: before})
+			}
+			if after == "" {
+				p.state = qwen3ParserStateToolStartedEatingWhitespace
+			} else {
+				p.state = qwen3ParserStateCollectingToolContent
+			}
+			return events, true
+		} else if overlapLen := overlap(acc, qwen3ToolOpenTag); overlapLen > 0 {
+			beforePartialTag := acc[:len(acc)-overlapLen]
+			trailingWsLen := trailingWhitespaceLen(beforePartialTag)
+			ambiguousStart := len(beforePartialTag) - trailingWsLen
+
+			unambiguous := acc[:ambiguousStart]
+			ambiguous := acc[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, qwen3EventContent{content: unambiguous})
+			}
+			return events, false
+		}
+
+		whitespaceLen := trailingWhitespaceLen(acc)
+		ambiguousStart := len(acc) - whitespaceLen
+		unambiguous := acc[:ambiguousStart]
+		ambiguous := acc[ambiguousStart:]
+		p.buffer.Reset()
+		p.buffer.WriteString(ambiguous)
+		if len(unambiguous) > 0 {
+			events = append(events, qwen3EventContent{content: unambiguous})
+		}
+		return events, false
+
+	case qwen3ParserStateToolStartedEatingWhitespace:
+		return p.eatLeadingWhitespaceAndTransitionTo(qwen3ParserStateCollectingToolContent)
+
+	case qwen3ParserStateCollectingToolContent:
+		acc := p.buffer.String()
+		if strings.Contains(acc, qwen3ToolCloseTag) {
+			toolContent, _ := p.splitAtTag(qwen3ToolCloseTag, true)
+			if len(toolContent) == 0 {
+				slog.Warn("qwen3 tool call closing tag found but no content before it")
+			}
+			events = append(events, qwen3EventRawToolCall{raw: toolContent})
+			p.state = qwen3ParserStateCollectingContent
+			return events, true
+		}
+		return events, false
+
+	default:
+		panic("unreachable")
+	}
+}
+
+func parseQwen3ToolCall(raw qwen3EventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
+	var parsed struct {
+		Name      string         `json:"name"`
+		Arguments map[string]any `json:"arguments"`
+	}
+
+	if err := json.Unmarshal([]byte(raw.raw), &parsed); err != nil {
+		return api.ToolCall{}, fmt.Errorf("failed to parse JSON: %w", err)
+	}
+
+	if parsed.Name == "" {
+		return api.ToolCall{}, fmt.Errorf("empty function name")
+	}
+
+	_ = tools // qwen3 uses direct JSON args and does not require schema coercion here.
+
+	toolCall := api.ToolCall{
+		Function: api.ToolCallFunction{
+			Name:      parsed.Name,
+			Arguments: api.NewToolCallFunctionArguments(),
+		},
+	}
+
+	for key, value := range parsed.Arguments {
+		toolCall.Function.Arguments.Set(key, value)
+	}
+
+	return toolCall, nil
+}
diff --git a/model/parsers/qwen3_test.go b/model/parsers/qwen3_test.go
new file mode 100644
index 000000000..853874ded
--- /dev/null
+++ b/model/parsers/qwen3_test.go
@@ -0,0 +1,147 @@
+package parsers
+
+import (
+	"testing"
+
+	"github.com/ollama/ollama/api"
+)
+
+func TestQwen3ParserThinkingEnabled(t *testing.T) {
+	parser := &Qwen3Parser{hasThinkingSupport: true, defaultThinking: true}
+	parser.Init(nil, nil, &api.ThinkValue{Value: true})
+
+	content, thinking, calls, err := parser.Add("Let me think...</think>Answer.", true)
+	if err != nil {
+		t.Fatalf("parse failed: %v", err)
+	}
+
+	if thinking != "Let me think..." {
+		t.Fatalf("expected thinking %q, got %q", "Let me think...", thinking)
+	}
+	if content != "Answer." {
+		t.Fatalf("expected content %q, got %q", "Answer.", content)
+	}
+	if len(calls) != 0 {
+		t.Fatalf("expected no tool calls, got %d", len(calls))
+	}
+}
+
+func TestQwen3ParserThinkingEnabledWithExplicitOpeningTag(t *testing.T) {
+	parser := &Qwen3Parser{hasThinkingSupport: true, defaultThinking: true}
+	parser.Init(nil, nil, &api.ThinkValue{Value: true})
+
+	content, thinking, calls, err := parser.Add("<think>\nLet me think...</think>Answer.", true)
+	if err != nil {
+		t.Fatalf("parse failed: %v", err)
+	}
+
+	if thinking != "Let me think..." {
+		t.Fatalf("expected thinking %q, got %q", "Let me think...", thinking)
+	}
+	if content != "Answer." {
+		t.Fatalf("expected content %q, got %q", "Answer.", content)
+	}
+	if len(calls) != 0 {
+		t.Fatalf("expected no tool calls, got %d", len(calls))
+	}
+}
+
+func TestQwen3ParserThinkingEnabledWithSplitOpeningTag(t *testing.T) {
+	parser := &Qwen3Parser{hasThinkingSupport: true, defaultThinking: true}
+	parser.Init(nil, nil, &api.ThinkValue{Value: true})
+
+	content, thinking, calls, err := parser.Add("<thi", false)
+	if err != nil {
+		t.Fatalf("parse failed on first chunk: %v", err)
+	}
+	if content != "" || thinking != "" || len(calls) != 0 {
+		t.Fatalf("expected no output for first chunk, got content=%q thinking=%q calls=%d", content, thinking, len(calls))
+	}
+
+	content, thinking, calls, err = parser.Add("nk>Let me think...</think>Answer.", true)
+	if err != nil {
+		t.Fatalf("parse failed on second chunk: %v", err)
+	}
+	if thinking != "Let me think..." {
+		t.Fatalf("expected thinking %q, got %q", "Let me think...", thinking)
+	}
+	if content != "Answer." {
+		t.Fatalf("expected content %q, got %q", "Answer.", content)
+	}
+	if len(calls) != 0 {
+		t.Fatalf("expected no tool calls, got %d", len(calls))
+	}
+}
+
+func TestQwen3ParserThinkingDisabled(t *testing.T) {
+	parser := &Qwen3Parser{hasThinkingSupport: false, defaultThinking: false}
+	parser.Init(nil, nil, &api.ThinkValue{Value: false})
+
+	content, thinking, calls, err := parser.Add("Direct answer", true)
+	if err != nil {
+		t.Fatalf("parse failed: %v", err)
+	}
+
+	if thinking != "" {
+		t.Fatalf("expected no thinking, got %q", thinking)
+	}
+	if content != "Direct answer" {
+		t.Fatalf("expected content %q, got %q", "Direct answer", content)
+	}
+	if len(calls) != 0 {
+		t.Fatalf("expected no tool calls, got %d", len(calls))
+	}
+}
+
+func TestQwen3ParserNilThinkDefaultsToContentForInstructParser(t *testing.T) {
+	parser := &Qwen3Parser{hasThinkingSupport: false, defaultThinking: false}
+	parser.Init(nil, nil, nil)
+
+	content, thinking, calls, err := parser.Add("Direct answer", true)
+	if err != nil {
+		t.Fatalf("parse failed: %v", err)
+	}
+
+	if thinking != "" {
+		t.Fatalf("expected no thinking, got %q", thinking)
+	}
+	if content != "Direct answer" {
+		t.Fatalf("expected content %q, got %q", "Direct answer", content)
+	}
+	if len(calls) != 0 {
+		t.Fatalf("expected no tool calls, got %d", len(calls))
+	}
+}
+
+func TestQwen3ParserToolCall(t *testing.T) {
+	parser := &Qwen3Parser{hasThinkingSupport: false, defaultThinking: false}
+	parser.Init(nil, nil, &api.ThinkValue{Value: false})
+
+	input := "<tool_call>{\"name\":\"get_weather\",\"arguments\":{\"location\":\"San Francisco\",\"unit\":\"celsius\"}}</tool_call>"
+	content, thinking, calls, err := parser.Add(input, true)
+	if err != nil {
+		t.Fatalf("parse failed: %v", err)
+	}
+
+	if content != "" {
+		t.Fatalf("expected empty content, got %q", content)
+	}
+	if thinking != "" {
+		t.Fatalf("expected empty thinking, got %q", thinking)
+	}
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(calls))
+	}
+	if calls[0].Function.Name != "get_weather" {
+		t.Fatalf("expected tool name %q, got %q", "get_weather", calls[0].Function.Name)
+	}
+
+	location, ok := calls[0].Function.Arguments.Get("location")
+	if !ok || location != "San Francisco" {
+		t.Fatalf("expected location %q, got %v", "San Francisco", location)
+	}
+	unit, ok := calls[0].Function.Arguments.Get("unit")
+	if !ok || unit != "celsius" {
+		t.Fatalf("expected unit %q, got %v", "celsius", unit)
+	}
+}
diff --git a/x/create/client/create.go b/x/create/client/create.go
index f89f9fc98..b8062f3d4 100644
--- a/x/create/client/create.go
+++ b/x/create/client/create.go
@@ -30,6 +30,8 @@ type ModelfileConfig struct {
 	Template string
 	System   string
 	License  string
+	Parser   string
+	Renderer string
 }
 
 // CreateOptions holds all options for model creation.
@@ -37,7 +39,7 @@ type CreateOptions struct {
 	ModelName string
 	ModelDir  string
 	Quantize  string           // "int4", "int8", "nvfp4", or "mxfp8" for quantization
-	Modelfile *ModelfileConfig // template/system/license from Modelfile
+	Modelfile *ModelfileConfig // template/system/license/parser/renderer from Modelfile
 }
 
 // CreateModel imports a model from a local directory.
@@ -267,8 +269,8 @@ func newManifestWriter(opts CreateOptions, capabilities []string, parserName, re
 			ModelFormat:  "safetensors",
 			Capabilities: caps,
 			Requires:     MinOllamaVersion,
-			Parser:       parserName,
-			Renderer:     rendererName,
+			Parser:       resolveParserName(opts.Modelfile, parserName),
+			Renderer:     resolveRendererName(opts.Modelfile, rendererName),
 		}
 		configJSON, err := json.Marshal(configData)
 		if err != nil {
@@ -305,6 +307,22 @@ func newManifestWriter(opts CreateOptions, capabilities []string, parserName, re
 	}
 }
 
+func resolveParserName(mf *ModelfileConfig, inferred string) string {
+	if mf != nil && mf.Parser != "" {
+		return mf.Parser
+	}
+
+	return inferred
+}
+
+func resolveRendererName(mf *ModelfileConfig, inferred string) string {
+	if mf != nil && mf.Renderer != "" {
+		return mf.Renderer
+	}
+
+	return inferred
+}
+
 // createModelfileLayers creates layers for template, system, and license from Modelfile config.
 func createModelfileLayers(mf *ModelfileConfig) ([]manifest.Layer, error) {
 	var layers []manifest.Layer
@@ -410,7 +428,7 @@ func getParserName(modelDir string) string {
 			return "deepseek3"
 		}
 		if strings.Contains(archLower, "qwen3") {
-			return "qwen3-coder"
+			return "qwen3"
 		}
 	}
 
@@ -424,7 +442,7 @@ func getParserName(modelDir string) string {
 			return "deepseek3"
 		}
 		if strings.Contains(typeLower, "qwen3") {
-			return "qwen3-coder"
+			return "qwen3"
 		}
 	}
 
diff --git a/x/create/client/create_test.go b/x/create/client/create_test.go
index b41807279..1e7062237 100644
--- a/x/create/client/create_test.go
+++ b/x/create/client/create_test.go
@@ -10,6 +10,8 @@ func TestModelfileConfig(t *testing.T) {
 		Template: "{{ .Prompt }}",
 		System:   "You are a helpful assistant.",
 		License:  "MIT",
+		Parser:   "qwen3",
+		Renderer: "qwen3",
 	}
 
 	if config.Template != "{{ .Prompt }}" {
@@ -21,6 +23,12 @@ func TestModelfileConfig(t *testing.T) {
 	if config.License != "MIT" {
 		t.Errorf("License = %q, want %q", config.License, "MIT")
 	}
+	if config.Parser != "qwen3" {
+		t.Errorf("Parser = %q, want %q", config.Parser, "qwen3")
+	}
+	if config.Renderer != "qwen3" {
+		t.Errorf("Renderer = %q, want %q", config.Renderer, "qwen3")
+	}
 }
 
 func TestModelfileConfig_Empty(t *testing.T) {
@@ -35,6 +43,12 @@ func TestModelfileConfig_Empty(t *testing.T) {
 	if config.License != "" {
 		t.Errorf("License should be empty, got %q", config.License)
 	}
+	if config.Parser != "" {
+		t.Errorf("Parser should be empty, got %q", config.Parser)
+	}
+	if config.Renderer != "" {
+		t.Errorf("Renderer should be empty, got %q", config.Renderer)
+	}
 }
 
 func TestModelfileConfig_PartialFields(t *testing.T) {
@@ -53,6 +67,12 @@ func TestModelfileConfig_PartialFields(t *testing.T) {
 	if config.License != "" {
 		t.Error("License should be empty")
 	}
+	if config.Parser != "" {
+		t.Error("Parser should be empty")
+	}
+	if config.Renderer != "" {
+		t.Error("Renderer should be empty")
+	}
 }
 
 func TestMinOllamaVersion(t *testing.T) {
@@ -98,6 +118,8 @@ func TestCreateOptions(t *testing.T) {
 			Template: "test",
 			System:   "system",
 			License:  "MIT",
+			Parser:   "qwen3-thinking",
+			Renderer: "qwen3",
 		},
 	}
 
@@ -116,6 +138,92 @@ func TestCreateOptions(t *testing.T) {
 	if opts.Modelfile.Template != "test" {
 		t.Errorf("Modelfile.Template = %q, want %q", opts.Modelfile.Template, "test")
 	}
+	if opts.Modelfile.Parser != "qwen3-thinking" {
+		t.Errorf("Modelfile.Parser = %q, want %q", opts.Modelfile.Parser, "qwen3-thinking")
+	}
+	if opts.Modelfile.Renderer != "qwen3" {
+		t.Errorf("Modelfile.Renderer = %q, want %q", opts.Modelfile.Renderer, "qwen3")
+	}
+}
+
+func TestResolveParserName(t *testing.T) {
+	tests := []struct {
+		name     string
+		mf       *ModelfileConfig
+		inferred string
+		want     string
+	}{
+		{
+			name:     "nil modelfile uses inferred",
+			mf:       nil,
+			inferred: "qwen3",
+			want:     "qwen3",
+		},
+		{
+			name: "empty parser uses inferred",
+			mf: &ModelfileConfig{
+				Parser: "",
+			},
+			inferred: "qwen3",
+			want:     "qwen3",
+		},
+		{
+			name: "explicit parser overrides inferred",
+			mf: &ModelfileConfig{
+				Parser: "qwen3-thinking",
+			},
+			inferred: "qwen3",
+			want:     "qwen3-thinking",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := resolveParserName(tt.mf, tt.inferred); got != tt.want {
+				t.Fatalf("resolveParserName() = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestResolveRendererName(t *testing.T) {
+	tests := []struct {
+		name     string
+		mf       *ModelfileConfig
+		inferred string
+		want     string
+	}{
+		{
+			name:     "nil modelfile uses inferred",
+			mf:       nil,
+			inferred: "qwen3-coder",
+			want:     "qwen3-coder",
+		},
+		{
+			name: "empty renderer uses inferred",
+			mf: &ModelfileConfig{
+				Renderer: "",
+			},
+			inferred: "qwen3-coder",
+			want:     "qwen3-coder",
+		},
+		{
+			name: "explicit renderer overrides inferred",
+			mf: &ModelfileConfig{
+				Renderer: "qwen3",
+			},
+			inferred: "qwen3-coder",
+			want:     "qwen3",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := resolveRendererName(tt.mf, tt.inferred); got != tt.want {
+				t.Fatalf("resolveRendererName() = %q, want %q", got, tt.want)
+			}
+		})
+	}
 }
 
 func TestCreateOptions_Defaults(t *testing.T) {
diff --git a/x/mlxrunner/imports.go b/x/mlxrunner/imports.go
index a8111b056..5e0cfe86d 100644
--- a/x/mlxrunner/imports.go
+++ b/x/mlxrunner/imports.go
@@ -6,4 +6,5 @@ import (
 	_ "github.com/ollama/ollama/x/models/gemma3"
 	_ "github.com/ollama/ollama/x/models/glm4_moe_lite"
 	_ "github.com/ollama/ollama/x/models/llama"
+	_ "github.com/ollama/ollama/x/models/qwen3"
 )
diff --git a/x/mlxrunner/pipeline.go b/x/mlxrunner/pipeline.go
index cd4d78620..0da5862c8 100644
--- a/x/mlxrunner/pipeline.go
+++ b/x/mlxrunner/pipeline.go
@@ -18,7 +18,15 @@ func (r *Runner) TextGenerationPipeline(request Request) error {
 		return errors.New("model not loaded")
 	}
 
-	mlx.EnableCompile()
+	enableCompile := true
+	if modelCompile, ok := r.Model.(interface{ EnableCompile() bool }); ok {
+		enableCompile = modelCompile.EnableCompile()
+	}
+	if enableCompile {
+		mlx.EnableCompile()
+	} else {
+		mlx.DisableCompile()
+	}
 
 	inputs := r.Tokenizer.Encode(request.Prompt, true)
 
diff --git a/x/models/qwen3/qwen3.go b/x/models/qwen3/qwen3.go
new file mode 100644
index 000000000..7a49cf37a
--- /dev/null
+++ b/x/models/qwen3/qwen3.go
@@ -0,0 +1,338 @@
+//go:build mlx
+
+// Package qwen3 provides the Qwen3 text model implementation for MLX.
+package qwen3
+
+import (
+	"encoding/json"
+	"fmt"
+	"math"
+
+	"github.com/ollama/ollama/x/imagegen/tokenizer"
+	"github.com/ollama/ollama/x/mlxrunner/cache"
+	"github.com/ollama/ollama/x/mlxrunner/mlx"
+	"github.com/ollama/ollama/x/mlxrunner/model"
+	"github.com/ollama/ollama/x/mlxrunner/model/base"
+	"github.com/ollama/ollama/x/models/nn"
+)
+
+func init() {
+	base.Register("Qwen3ForCausalLM", newModel)
+}
+
+// Config holds Qwen3 model configuration.
+type Config struct {
+	HiddenSize            int32   `json:"hidden_size"`
+	NumHiddenLayers       int32   `json:"num_hidden_layers"`
+	IntermediateSize      int32   `json:"intermediate_size"`
+	NumAttentionHeads     int32   `json:"num_attention_heads"`
+	NumKeyValueHeads      int32   `json:"num_key_value_heads"`
+	VocabSize             int32   `json:"vocab_size"`
+	RMSNormEps            float32 `json:"rms_norm_eps"`
+	RopeTheta             float32 `json:"rope_theta"`
+	HeadDim               int32   `json:"head_dim"`
+	MaxPositionEmbeddings int32   `json:"max_position_embeddings"`
+	TieWordEmbeddings     bool    `json:"tie_word_embeddings"`
+
+	// Quantization parameters (set during load based on model quantization).
+	QuantGroupSize int                               `json:"-"`
+	QuantBits      int                               `json:"-"`
+	QuantMode      string                            `json:"-"`
+	TensorQuant    map[string]*model.TensorQuantInfo `json:"-"`
+
+	// Computed fields.
+	Scale     float32 `json:"-"`
+	QKNormEps float32 `json:"-"`
+}
+
+// Model is the Qwen3 text-only model.
+type Model struct {
+	EmbedTokens *nn.Embedding
+	Layers      []*Layer
+	Norm        *nn.RMSNorm
+	LMHead      nn.LinearLayer
+
+	tok *tokenizer.Tokenizer
+	*Config
+
+	weightPrefix string
+}
+
+// Layer is a single Qwen3 decoder block.
+type Layer struct {
+	Attention     *Attention
+	MLP           *MLP
+	AttentionNorm *nn.RMSNorm
+	MLPNorm       *nn.RMSNorm
+}
+
+// Attention implements Qwen3 attention with Q/K norms.
+type Attention struct {
+	QProj nn.LinearLayer
+	KProj nn.LinearLayer
+	VProj nn.LinearLayer
+	OProj nn.LinearLayer
+	QNorm *nn.RMSNorm
+	KNorm *nn.RMSNorm
+}
+
+// MLP is the feed-forward network with SwiGLU activation.
+type MLP struct {
+	GateProj nn.LinearLayer
+	UpProj   nn.LinearLayer
+	DownProj nn.LinearLayer
+}
+
+func resolveWeightPrefix(tensors map[string]*mlx.Array) string {
+	for _, prefix := range []string{"", "language_model."} {
+		if tensors[prefix+"model.embed_tokens.weight"] != nil {
+			return prefix
+		}
+	}
+	return ""
+}
+
+func newModel(root *model.Root) (base.Model, error) {
+	configData, err := root.Manifest.ReadConfig("config.json")
+	if err != nil {
+		return nil, fmt.Errorf("load config: %w", err)
+	}
+
+	var cfg Config
+	if err := json.Unmarshal(configData, &cfg); err != nil {
+		return nil, fmt.Errorf("parse config: %w", err)
+	}
+
+	if cfg.HiddenSize <= 0 {
+		return nil, fmt.Errorf("invalid hidden_size: %d", cfg.HiddenSize)
+	}
+	if cfg.NumAttentionHeads <= 0 {
+		return nil, fmt.Errorf("invalid num_attention_heads: %d", cfg.NumAttentionHeads)
+	}
+	if cfg.NumKeyValueHeads <= 0 {
+		cfg.NumKeyValueHeads = cfg.NumAttentionHeads
+	}
+	if cfg.HeadDim == 0 {
+		if cfg.HiddenSize%cfg.NumAttentionHeads != 0 {
+			return nil, fmt.Errorf("hidden_size (%d) must be divisible by num_attention_heads (%d)", cfg.HiddenSize, cfg.NumAttentionHeads)
+		}
+		cfg.HeadDim = cfg.HiddenSize / cfg.NumAttentionHeads
+	}
+	if cfg.HeadDim <= 0 {
+		return nil, fmt.Errorf("invalid head_dim: %d", cfg.HeadDim)
+	}
+	if cfg.NumAttentionHeads%cfg.NumKeyValueHeads != 0 {
+		return nil, fmt.Errorf("num_attention_heads (%d) must be divisible by num_key_value_heads (%d)", cfg.NumAttentionHeads, cfg.NumKeyValueHeads)
+	}
+	if cfg.RMSNormEps == 0 {
+		cfg.RMSNormEps = 1e-6
+	}
+	if cfg.RopeTheta == 0 {
+		cfg.RopeTheta = 1000000
+	}
+	cfg.Scale = float32(1.0 / math.Sqrt(float64(cfg.HeadDim)))
+	cfg.QKNormEps = 1e-6
+
+	if qt := root.QuantType(); qt != "" {
+		cfg.QuantGroupSize, cfg.QuantBits, cfg.QuantMode = model.QuantizationParams(qt)
+		if gs := root.GroupSize(); gs > 0 {
+			cfg.QuantGroupSize = gs
+		}
+	} else {
+		cfg.QuantGroupSize, cfg.QuantBits, cfg.QuantMode = model.QuantizationParams("")
+	}
+	cfg.TensorQuant = root.AllTensorQuant()
+
+	tokData, err := root.Manifest.ReadConfig("tokenizer.json")
+	if err != nil {
+		return nil, fmt.Errorf("load tokenizer config: %w", err)
+	}
+
+	tokConfig := &tokenizer.TokenizerConfig{
+		ConfigJSON: configData,
+	}
+	if genConfigData, err := root.Manifest.ReadConfig("generation_config.json"); err == nil {
+		tokConfig.GenerationConfigJSON = genConfigData
+	}
+	if tokConfigData, err := root.Manifest.ReadConfig("tokenizer_config.json"); err == nil {
+		tokConfig.TokenizerConfigJSON = tokConfigData
+	}
+
+	tok, err := tokenizer.LoadFromBytesWithConfig(tokData, tokConfig)
+	if err != nil {
+		return nil, fmt.Errorf("parse tokenizer: %w", err)
+	}
+
+	m := &Model{
+		Layers: make([]*Layer, cfg.NumHiddenLayers),
+		Config: &cfg,
+		tok:    tok,
+	}
+
+	return m, nil
+}
+
+// LoadWeights receives all tensors loaded from the manifest and assigns them
+// to model fields.
+func (m *Model) LoadWeights(tensors map[string]*mlx.Array) error {
+	m.weightPrefix = resolveWeightPrefix(tensors)
+	prefix := m.weightPrefix
+	linears := model.NewLinearFactory(tensors, m.QuantGroupSize, m.QuantBits, m.QuantMode, m.TensorQuant)
+
+	embedWeight := tensors[prefix+"model.embed_tokens.weight"]
+	if embedWeight == nil {
+		return fmt.Errorf("missing embedding weight: %smodel.embed_tokens.weight", prefix)
+	}
+	m.EmbedTokens = nn.NewEmbedding(embedWeight)
+
+	normWeight := tensors[prefix+"model.norm.weight"]
+	if normWeight == nil {
+		return fmt.Errorf("missing final norm weight: %smodel.norm.weight", prefix)
+	}
+	m.Norm = nn.NewRMSNorm(normWeight, m.RMSNormEps)
+
+	if m.TieWordEmbeddings {
+		m.LMHead = nn.NewLinear(embedWeight, nil)
+	} else if lmHead := linears.Make(prefix + "lm_head"); lmHead != nil {
+		m.LMHead = lmHead
+	} else if lmHead := linears.Make("lm_head"); lmHead != nil {
+		m.LMHead = lmHead
+	} else {
+		// Qwen3 checkpoints commonly tie output projection to embeddings.
+		m.LMHead = nn.NewLinear(embedWeight, nil)
+	}
+
+	for i := int32(0); i < m.NumHiddenLayers; i++ {
+		layerPrefix := fmt.Sprintf("%smodel.layers.%d", prefix, i)
+
+		layer := &Layer{
+			Attention: &Attention{},
+			MLP:       &MLP{},
+		}
+
+		if w := tensors[layerPrefix+".input_layernorm.weight"]; w != nil {
+			layer.AttentionNorm = nn.NewRMSNorm(w, m.RMSNormEps)
+		}
+		if w := tensors[layerPrefix+".post_attention_layernorm.weight"]; w != nil {
+			layer.MLPNorm = nn.NewRMSNorm(w, m.RMSNormEps)
+		}
+
+		layer.Attention.QProj = linears.Make(layerPrefix + ".self_attn.q_proj")
+		layer.Attention.KProj = linears.Make(layerPrefix + ".self_attn.k_proj")
+		layer.Attention.VProj = linears.Make(layerPrefix + ".self_attn.v_proj")
+		layer.Attention.OProj = linears.Make(layerPrefix + ".self_attn.o_proj")
+
+		if w := tensors[layerPrefix+".self_attn.q_norm.weight"]; w != nil {
+			layer.Attention.QNorm = nn.NewRMSNorm(w, m.QKNormEps)
+		}
+		if w := tensors[layerPrefix+".self_attn.k_norm.weight"]; w != nil {
+			layer.Attention.KNorm = nn.NewRMSNorm(w, m.QKNormEps)
+		}
+
+		layer.MLP.GateProj = linears.Make(layerPrefix + ".mlp.gate_proj")
+		layer.MLP.UpProj = linears.Make(layerPrefix + ".mlp.up_proj")
+		layer.MLP.DownProj = linears.Make(layerPrefix + ".mlp.down_proj")
+
+		if layer.AttentionNorm == nil {
+			return fmt.Errorf("layer %d: missing input_layernorm", i)
+		}
+		if layer.MLPNorm == nil {
+			return fmt.Errorf("layer %d: missing post_attention_layernorm", i)
+		}
+		if layer.Attention.QProj == nil || layer.Attention.KProj == nil || layer.Attention.VProj == nil || layer.Attention.OProj == nil {
+			return fmt.Errorf("layer %d: missing attention projections", i)
+		}
+		if layer.Attention.QNorm == nil || layer.Attention.KNorm == nil {
+			return fmt.Errorf("layer %d: missing attention q/k norms", i)
+		}
+		if layer.MLP.GateProj == nil || layer.MLP.UpProj == nil || layer.MLP.DownProj == nil {
+			return fmt.Errorf("layer %d: missing mlp projections", i)
+		}
+
+		m.Layers[i] = layer
+	}
+
+	collected := mlx.Collect(m)
+	mlx.Eval(collected...)
+
+	return nil
+}
+
+func (m *Model) Forward(tokens *mlx.Array, caches []cache.Cache) *mlx.Array {
+	dims := tokens.Dims()
+	B, L := int32(dims[0]), int32(dims[1])
+
+	h := m.EmbedTokens.Forward(tokens)
+	for i, layer := range m.Layers {
+		var c cache.Cache
+		if caches != nil && i < len(caches) {
+			c = caches[i]
+		}
+		h = layer.Forward(h, c, B, L, m.Config)
+	}
+
+	return m.Norm.Forward(h, m.RMSNormEps)
+}
+
+func (m *Model) Unembed(x *mlx.Array) *mlx.Array {
+	return m.LMHead.Forward(x)
+}
+
+func (m *Model) NumLayers() int {
+	return len(m.Layers)
+}
+
+func (m *Model) Tokenizer() *tokenizer.Tokenizer {
+	return m.tok
+}
+
+func (m *Model) NewCaches() []cache.Cache {
+	caches := make([]cache.Cache, len(m.Layers))
+	for i := range caches {
+		caches[i] = cache.NewKVCache()
+	}
+	return caches
+}
+
+func (l *Layer) Forward(x *mlx.Array, c cache.Cache, B, L int32, cfg *Config) *mlx.Array {
+	h := mlx.Add(x, l.Attention.Forward(l.AttentionNorm.Forward(x, cfg.RMSNormEps), c, B, L, cfg))
+	return mlx.Add(h, l.MLP.Forward(l.MLPNorm.Forward(h, cfg.RMSNormEps)))
+}
+
+func (a *Attention) Forward(x *mlx.Array, c cache.Cache, B, L int32, cfg *Config) *mlx.Array {
+	q := a.QProj.Forward(x)
+	k := a.KProj.Forward(x)
+	v := a.VProj.Forward(x)
+
+	q = mlx.Reshape(q, B, L, cfg.NumAttentionHeads, cfg.HeadDim)
+	k = mlx.Reshape(k, B, L, cfg.NumKeyValueHeads, cfg.HeadDim)
+	v = mlx.Reshape(v, B, L, cfg.NumKeyValueHeads, cfg.HeadDim)
+
+	q = a.QNorm.Forward(q, cfg.QKNormEps)
+	k = a.KNorm.Forward(k, cfg.QKNormEps)
+
+	q = mlx.Transpose(q, 0, 2, 1, 3)
+	k = mlx.Transpose(k, 0, 2, 1, 3)
+	v = mlx.Transpose(v, 0, 2, 1, 3)
+
+	offset := 0
+	if c != nil {
+		offset = c.Offset()
+	}
+	q = mlx.RoPEWithBase(q, int(cfg.HeadDim), false, cfg.RopeTheta, 1.0, offset)
+	k = mlx.RoPEWithBase(k, int(cfg.HeadDim), false, cfg.RopeTheta, 1.0, offset)
+
+	if c != nil {
+		k, v = c.Update(k, v)
+	}
+
+	// MLX SDPA supports grouped-query attention directly (Q heads can be a
+	// multiple of K/V heads), so avoid materializing repeated K/V tensors.
+	out := mlx.ScaledDotProductAttentionCausal(q, k, v, cfg.Scale, L > 1)
+	out = mlx.Reshape(mlx.Transpose(out, 0, 2, 1, 3), B, L, cfg.NumAttentionHeads*cfg.HeadDim)
+	return a.OProj.Forward(out)
+}
+
+func (m *MLP) Forward(x *mlx.Array) *mlx.Array {
+	return m.DownProj.Forward(mlx.Mul(mlx.SiLU(m.GateProj.Forward(x)), m.UpProj.Forward(x)))
+}