mirror of
https://github.com/ollama/ollama.git
synced 2026-05-07 00:22:43 -05:00
Closed
opened 2026-04-12 12:21:11 -05:00 by GiteaMirror
·
9 comments
No Branch/Tag Specified
main
hoyyeva/anthropic-local-image-path
dhiltgen/ci
dhiltgen/llama-runner
parth-remove-claude-desktop-launch
hoyyeva/anthropic-reference-images-path
parth-anthropic-reference-images-path
brucemacd/download-before-remove
hoyyeva/editor-config-repair
parth-mlx-decode-checkpoints
parth-launch-codex-app
hoyyeva/fix-codex-model-metadata-warning
hoyyeva/qwen
parth/hide-claude-desktop-till-release
hoyyeva/opencode-image-modality
parth-add-claude-code-autoinstall
release_v0.22.0
pdevine/manifest-list
codex/fix-codex-model-metadata-warning
pdevine/addressable-manifest
brucemacd/launch-fetch-reccomended
jmorganca/llama-compat
launch-copilot-cli
hoyyeva/opencode-thinking
release_v0.20.7
parth-auto-save-backup
parth-test
jmorganca/gemma4-audio-replacements
fix-manifest-digest-on-pull
hoyyeva/vscode-improve
brucemacd/install-server-wait
parth/update-claude-docs
brucemac/start-ap-install
pdevine/mlx-update
pdevine/qwen35_vision
drifkin/api-show-fallback
mintlify/image-generation-1773352582
hoyyeva/server-context-length-local-config
jmorganca/faster-reptition-penalties
jmorganca/convert-nemotron
parth-pi-thinking
pdevine/sampling-penalties
jmorganca/fix-create-quantization-memory
dongchen/resumable_transfer_fix
pdevine/sampling-cache-error
jessegross/mlx-usage
hoyyeva/openclaw-config
hoyyeva/app-html
pdevine/qwen3next
brucemacd/sign-sh-install
brucemacd/tui-update
brucemacd/usage-api
jmorganca/launch-empty
fix-app-dist-embed
mxyng/mlx-compile
mxyng/mlx-quant
mxyng/mlx-glm4.7
mxyng/mlx
brucemacd/simplify-model-picker
jmorganca/qwen3-concurrent
fix-glm-4.7-flash-mla-config
drifkin/qwen3-coder-opening-tag
brucemacd/usage-cli
fix-cuda12-fattn-shmem
ollama-imagegen-docs
parth/fix-multiline-inputs
brucemacd/config-docs
mxyng/model-files
mxyng/simple-execute
fix-imagegen-ollama-models
mxyng/async-upload
jmorganca/lazy-no-dtype-changes
imagegen-auto-detect-create
parth/decrease-concurrent-download-hf
fix-mlx-quantize-init
jmorganca/x-cleanup
usage
imagegen-readme
jmorganca/glm-image
mlx-gpu-cd
jmorganca/imagegen-modelfile
parth/agent-skills
parth/agent-allowlist
parth/signed-in-offline
parth/agents
parth/fix-context-chopping
improve-cloud-flow
parth/add-models-websearch
parth/prompt-renderer-mcp
jmorganca/native-settings
jmorganca/download-stream-hash
jmorganca/client2-rebased
brucemacd/oai-chat-req-multipart
jessegross/multi_chunk_reserve
grace/additional-omit-empty
grace/mistral-3-large
mxyng/tokenizer2
mxyng/tokenizer
jessegross/flash
hoyyeva/windows-nacked-app
mxyng/cleanup-attention
grace/deepseek-parser
hoyyeva/remember-unsent-prompt
parth/add-lfs-pointer-error-conversion
parth/olmo2-test2
hoyyeva/ollama-launchagent-plist
nicole/olmo-model
parth/olmo-test
mxyng/remove-embedded
parth/render-template
jmorganca/intellect-3
parth/remove-prealloc-linter
jmorganca/cmd-eval
nicole/nomic-embed-text-fix
mxyng/lint-2
hoyyeva/add-gemini-3-pro-preview
hoyyeva/load-model-list
mxyng/expand-path
mxyng/environ-2
hoyyeva/deeplink-json-encoding
parth/improve-tool-calling-tests
hoyyeva/conversation
hoyyeva/assistant-edit-response
hoyyeva/thinking
origin/brucemacd/invalid-char-i-err
parth/improve-tool-calling
jmorganca/required-omitempty
grace/qwen3-vl-tests
mxyng/iter-client
parth/docs-readme
nicole/embed-test
pdevine/integration-benchstat
parth/remove-generate-cmd
parth/add-toolcall-id
mxyng/server-tests
jmorganca/glm-4.6
jmorganca/gin-h-compat
drifkin/stable-tool-args
pdevine/qwen3-more-thinking
parth/add-websearch-client
nicole/websearch_local
jmorganca/qwen3-coder-updates
grace/deepseek-v3-migration-tests
mxyng/fix-create
jmorganca/cloud-errors
pdevine/parser-tidy
revert-12233-parth/simplify-entrypoints-runner
parth/enable-so-gpt-oss
brucemacd/qwen3vl
jmorganca/readme-simplify
parth/gpt-oss-structured-outputs
revert-12039-jmorganca/tools-braces
mxyng/embeddings
mxyng/gguf
mxyng/benchmark
mxyng/types-null
parth/move-parsing
mxyng/gemma2
jmorganca/docs
mxyng/16-bit
mxyng/create-stdin
pdevine/authorizedkeys
mxyng/quant
parth/opt-in-error-context-window
brucemacd/cache-models
brucemacd/runner-completion
jmorganca/llama-update-6
brucemacd/benchmark-list
brucemacd/partial-read-caps
parth/deepseek-r1-tools
mxyng/omit-array
parth/tool-prefix-temp
brucemacd/runner-test
jmorganca/qwen25vl
brucemacd/model-forward-test-ext
parth/python-function-parsing
jmorganca/cuda-compression-none
drifkin/num-parallel
drifkin/chat-truncation-fix
jmorganca/sync
parth/python-tools-calling
drifkin/array-head-count
brucemacd/create-no-loop
parth/server-enable-content-stream-with-tools
qwen25omni
mxyng/v3
brucemacd/ropeconfig
jmorganca/silence-tokenizer
parth/sample-so-test
parth/sampling-structured-outputs
brucemacd/doc-go-engine
parth/constrained-sampling-json
jmorganca/mistral-wip
brucemacd/mistral-small-convert
parth/sample-unmarshal-json-for-params
brucemacd/jomorganca/mistral
pdevine/bfloat16
jmorganca/mistral
brucemacd/mistral
pdevine/logging
parth/sample-correctness-fix
parth/sample-fix-sorting
jmorgan/sample-fix-sorting-extras
jmorganca/temp-0-images
brucemacd/parallel-embed-models
brucemacd/shim-grammar
jmorganca/fix-gguf-error
bmizerany/nameswork
jmorganca/faster-releases
bmizerany/validatenames
brucemacd/err-no-vocab
brucemacd/rope-config
brucemacd/err-hint
brucemacd/qwen2_5
brucemacd/logprobs
brucemacd/new_runner_graph_bench
progress-flicker
brucemacd/forward-test
brucemacd/go_qwen2
pdevine/gemma2
jmorganca/add-missing-symlink-eval
mxyng/next-debug
parth/set-context-size-openai
brucemacd/next-bpe-bench
brucemacd/next-bpe-test
brucemacd/new_runner_e2e
brucemacd/new_runner_qwen2
pdevine/convert-cohere2
brucemacd/convert-cli
parth/log-probs
mxyng/next-mlx
mxyng/cmd-history
parth/templating
parth/tokenize-detokenize
brucemacd/check-key-register
bmizerany/grammar
jmorganca/vendor-081b29bd
mxyng/func-checks
jmorganca/fix-null-format
parth/fix-default-to-warn-json
jmorganca/qwen2vl
jmorganca/no-concat
parth/cmd-cleanup-SO
brucemacd/check-key-register-structured-err
parth/openai-stream-usage
parth/fix-referencing-so
stream-tools-stop
jmorganca/degin-1
brucemacd/install-path-clean
brucemacd/push-name-validation
brucemacd/browser-key-register
jmorganca/openai-fix-first-message
jmorganca/fix-proxy
jessegross/sample
parth/disallow-streaming-tools
dhiltgen/remove_submodule
jmorganca/ga
jmorganca/mllama
pdevine/newlines
pdevine/geems-2b
jmorganca/llama-bump
mxyng/modelname-7
mxyng/gin-slog
mxyng/modelname-6
jyan/convert-prog
jyan/quant5
paligemma-support
pdevine/import-docs
jmorganca/openai-context
jyan/paligemma
jyan/p2
jyan/palitest
bmizerany/embedspeedup
jmorganca/llama-vit
brucemacd/allow-ollama
royh/ep-methods
royh/whisper
mxyng/api-models
mxyng/fix-memory
jyan/q4_4/8
jyan/ollama-v
royh/stream-tools
roy-embed-parallel
bmizerany/hrm
revert-5963-revert-5924-mxyng/llama3.1-rope
royh/embed-viz
jyan/local2
jyan/auth
jyan/local
jyan/parse-temp
jmorganca/template-mistral
jyan/reord-g
royh-openai-suffixdocs
royh-imgembed
royh-embed-parallel
jyan/quant4
royh-precision
jyan/progress
pdevine/fix-template
jyan/quant3
pdevine/ggla
mxyng/update-registry-domain
jmorganca/ggml-static
mxyng/create-context
jyan/v0.146
mxyng/layers-from-files
build_dist
bmizerany/noseek
royh-ls
royh-name
timeout
mxyng/server-timestamp
bmizerany/nosillyggufslurps
royh-params
jmorganca/llama-cpp-7c26775
royh-openai-delete
royh-show-rigid
jmorganca/enable-fa
jmorganca/no-error-template
jyan/format
royh-testdelete
bmizerany/fastverify
language_support
pdevine/ps-glitches
brucemacd/tokenize
bruce/iq-quants
bmizerany/filepathwithcoloninhost
mxyng/split-bin
bmizerany/client-registry
jmorganca/if-none-match
native
jmorganca/native
jmorganca/batch-embeddings
jmorganca/initcmake
jmorganca/mm
pdevine/showggmlinfo
modenameenforcealphanum
bmizerany/modenameenforcealphanum
jmorganca/done-reason
jmorganca/llama-cpp-8960fe8
ollama.com
bmizerany/filepathnobuild
bmizerany/types/model/defaultfix
rmdisplaylong
nogogen
bmizerany/x
modelfile-readme
bmizerany/replacecolon
jmorganca/limit
jmorganca/execstack
jmorganca/replace-assets
mxyng/tune-concurrency
jmorganca/testing
whitespace-detection
jmorganca/options
upgrade-all
scratch
cuda-search
mattw/airenamer
mattw/allmodelsonhuggingface
mattw/quantcontext
mattw/whatneedstorun
brucemacd/llama-mem-calc
mattw/faq-context
mattw/communitylinks
mattw/noprune
mattw/python-functioncalling
rename
mxyng/install
pulse
remove-first
editor
mattw/selfqueryingretrieval
cgo
mattw/howtoquant
api
matt/streamingapi
format-config
mxyng/extra-args
shell
update-nous-hermes
cp-model
upload-progress
fix-unknown-model
fix-model-names
delete-fix
insecure-registry
ls
deletemodels
progressbar
readme-updates
license-layers
skip-list
list-models
modelpath
matt/examplemodelfiles
distribution
go-opts
v0.30.0-rc3
v0.30.0-rc2
v0.30.0-rc1
v0.30.0-rc0
v0.23.1
v0.23.1-rc0
v0.23.0
v0.23.0-rc0
v0.22.1
v0.22.1-rc1
v0.22.1-rc0
v0.22.0
v0.22.0-rc1
v0.21.3-rc0
v0.21.2-rc1
v0.21.2
v0.21.2-rc0
v0.21.1
v0.21.1-rc1
v0.21.1-rc0
v0.21.0
v0.21.0-rc1
v0.21.0-rc0
v0.20.8-rc0
v0.20.7
v0.20.7-rc1
v0.20.7-rc0
v0.20.6
v0.20.6-rc1
v0.20.6-rc0
v0.20.5
v0.20.5-rc2
v0.20.5-rc1
v0.20.5-rc0
v0.20.4
v0.20.4-rc2
v0.20.4-rc1
v0.20.4-rc0
v0.20.3
v0.20.3-rc0
v0.20.2
v0.20.1
v0.20.1-rc2
v0.20.1-rc1
v0.20.1-rc0
v0.20.0
v0.20.0-rc1
v0.20.0-rc0
v0.19.0
v0.19.0-rc2
v0.19.0-rc1
v0.19.0-rc0
v0.18.4-rc1
v0.18.4-rc0
v0.18.3
v0.18.3-rc2
v0.18.3-rc1
v0.18.3-rc0
v0.18.2
v0.18.2-rc1
v0.18.2-rc0
v0.18.1
v0.18.1-rc1
v0.18.1-rc0
v0.18.0
v0.18.0-rc2
v0.18.0-rc1
v0.18.0-rc0
v0.17.8-rc4
v0.17.8-rc3
v0.17.8-rc2
v0.17.8-rc1
v0.17.8-rc0
v0.17.7
v0.17.7-rc2
v0.17.7-rc1
v0.17.7-rc0
v0.17.6
v0.17.5
v0.17.4
v0.17.3
v0.17.2
v0.17.1
v0.17.1-rc2
v0.17.1-rc1
v0.17.1-rc0
v0.17.0
v0.17.0-rc2
v0.17.0-rc1
v0.17.0-rc0
v0.16.3
v0.16.3-rc2
v0.16.3-rc1
v0.16.3-rc0
v0.16.2
v0.16.2-rc0
v0.16.1
v0.16.0
v0.16.0-rc2
v0.16.0-rc0
v0.16.0-rc1
v0.15.6
v0.15.5
v0.15.5-rc5
v0.15.5-rc4
v0.15.5-rc3
v0.15.5-rc2
v0.15.5-rc1
v0.15.5-rc0
v0.15.4
v0.15.3
v0.15.2
v0.15.1
v0.15.1-rc1
v0.15.1-rc0
v0.15.0-rc6
v0.15.0
v0.15.0-rc5
v0.15.0-rc4
v0.15.0-rc3
v0.15.0-rc2
v0.15.0-rc1
v0.15.0-rc0
v0.14.3
v0.14.3-rc3
v0.14.3-rc2
v0.14.3-rc1
v0.14.3-rc0
v0.14.2
v0.14.2-rc1
v0.14.2-rc0
v0.14.1
v0.14.0-rc11
v0.14.0
v0.14.0-rc10
v0.14.0-rc9
v0.14.0-rc8
v0.14.0-rc7
v0.14.0-rc6
v0.14.0-rc5
v0.14.0-rc4
v0.14.0-rc3
v0.14.0-rc2
v0.14.0-rc1
v0.14.0-rc0
v0.13.5
v0.13.5-rc1
v0.13.5-rc0
v0.13.4-rc2
v0.13.4
v0.13.4-rc1
v0.13.4-rc0
v0.13.3
v0.13.3-rc1
v0.13.3-rc0
v0.13.2
v0.13.2-rc2
v0.13.2-rc1
v0.13.2-rc0
v0.13.1
v0.13.1-rc2
v0.13.1-rc1
v0.13.1-rc0
v0.13.0
v0.13.0-rc0
v0.12.11
v0.12.11-rc1
v0.12.11-rc0
v0.12.10
v0.12.10-rc1
v0.12.10-rc0
v0.12.9-rc0
v0.12.9
v0.12.8
v0.12.8-rc0
v0.12.7
v0.12.7-rc1
v0.12.7-rc0
v0.12.7-citest0
v0.12.6
v0.12.6-rc1
v0.12.6-rc0
v0.12.5
v0.12.5-rc0
v0.12.4
v0.12.4-rc7
v0.12.4-rc6
v0.12.4-rc5
v0.12.4-rc4
v0.12.4-rc3
v0.12.4-rc2
v0.12.4-rc1
v0.12.4-rc0
v0.12.3
v0.12.2
v0.12.2-rc0
v0.12.1
v0.12.1-rc1
v0.12.1-rc2
v0.12.1-rc0
v0.12.0
v0.12.0-rc1
v0.12.0-rc0
v0.11.11
v0.11.11-rc3
v0.11.11-rc2
v0.11.11-rc1
v0.11.11-rc0
v0.11.10
v0.11.9
v0.11.9-rc0
v0.11.8
v0.11.8-rc0
v0.11.7-rc1
v0.11.7-rc0
v0.11.7
v0.11.6
v0.11.6-rc0
v0.11.5-rc4
v0.11.5-rc3
v0.11.5
v0.11.5-rc5
v0.11.5-rc2
v0.11.5-rc1
v0.11.5-rc0
v0.11.4
v0.11.4-rc0
v0.11.3
v0.11.3-rc0
v0.11.2
v0.11.1
v0.11.0-rc0
v0.11.0-rc1
v0.11.0-rc2
v0.11.0
v0.10.2-int1
v0.10.1
v0.10.0
v0.10.0-rc4
v0.10.0-rc3
v0.10.0-rc2
v0.10.0-rc1
v0.10.0-rc0
v0.9.7-rc1
v0.9.7-rc0
v0.9.6
v0.9.6-rc0
v0.9.6-ci0
v0.9.5
v0.9.4-rc5
v0.9.4-rc6
v0.9.4
v0.9.4-rc3
v0.9.4-rc4
v0.9.4-rc1
v0.9.4-rc2
v0.9.4-rc0
v0.9.3
v0.9.3-rc5
v0.9.4-citest0
v0.9.3-rc4
v0.9.3-rc3
v0.9.3-rc2
v0.9.3-rc1
v0.9.3-rc0
v0.9.2
v0.9.1
v0.9.1-rc1
v0.9.1-rc0
v0.9.1-ci1
v0.9.1-ci0
v0.9.0
v0.9.0-rc0
v0.8.0
v0.8.0-rc0
v0.7.1-rc2
v0.7.1
v0.7.1-rc1
v0.7.1-rc0
v0.7.0
v0.7.0-rc1
v0.7.0-rc0
v0.6.9-rc0
v0.6.8
v0.6.8-rc0
v0.6.7
v0.6.7-rc2
v0.6.7-rc1
v0.6.7-rc0
v0.6.6
v0.6.6-rc2
v0.6.6-rc1
v0.6.6-rc0
v0.6.5-rc1
v0.6.5
v0.6.5-rc0
v0.6.4-rc0
v0.6.4
v0.6.3-rc1
v0.6.3
v0.6.3-rc0
v0.6.2
v0.6.2-rc0
v0.6.1
v0.6.1-rc0
v0.6.0-rc0
v0.6.0
v0.5.14-rc0
v0.5.13
v0.5.13-rc6
v0.5.13-rc5
v0.5.13-rc4
v0.5.13-rc3
v0.5.13-rc2
v0.5.13-rc1
v0.5.13-rc0
v0.5.12
v0.5.12-rc1
v0.5.12-rc0
v0.5.11
v0.5.10
v0.5.9
v0.5.9-rc0
v0.5.8-rc13
v0.5.8
v0.5.8-rc12
v0.5.8-rc11
v0.5.8-rc10
v0.5.8-rc9
v0.5.8-rc8
v0.5.8-rc7
v0.5.8-rc6
v0.5.8-rc5
v0.5.8-rc4
v0.5.8-rc3
v0.5.8-rc2
v0.5.8-rc1
v0.5.8-rc0
v0.5.7
v0.5.6
v0.5.5
v0.5.5-rc0
v0.5.4
v0.5.3
v0.5.3-rc0
v0.5.2
v0.5.2-rc3
v0.5.2-rc2
v0.5.2-rc1
v0.5.2-rc0
v0.5.1
v0.5.0
v0.5.0-rc1
v0.4.8-rc0
v0.4.7
v0.4.6
v0.4.5
v0.4.4
v0.4.3
v0.4.3-rc0
v0.4.2
v0.4.2-rc1
v0.4.2-rc0
v0.4.1
v0.4.1-rc0
v0.4.0
v0.4.0-rc8
v0.4.0-rc7
v0.4.0-rc6
v0.4.0-rc5
v0.4.0-rc4
v0.4.0-rc3
v0.4.0-rc2
v0.4.0-rc1
v0.4.0-rc0
v0.4.0-ci3
v0.3.14
v0.3.14-rc0
v0.3.13
v0.3.12
v0.3.12-rc5
v0.3.12-rc4
v0.3.12-rc3
v0.3.12-rc2
v0.3.12-rc1
v0.3.11
v0.3.11-rc4
v0.3.11-rc3
v0.3.11-rc2
v0.3.11-rc1
v0.3.10
v0.3.10-rc1
v0.3.9
v0.3.8
v0.3.7
v0.3.7-rc6
v0.3.7-rc5
v0.3.7-rc4
v0.3.7-rc3
v0.3.7-rc2
v0.3.7-rc1
v0.3.6
v0.3.5
v0.3.4
v0.3.3
v0.3.2
v0.3.1
v0.3.0
v0.2.8
v0.2.8-rc2
v0.2.8-rc1
v0.2.7
v0.2.6
v0.2.5
v0.2.4
v0.2.3
v0.2.2
v0.2.2-rc2
v0.2.2-rc1
v0.2.1
v0.2.0
v0.1.49-rc14
v0.1.49-rc13
v0.1.49-rc12
v0.1.49-rc11
v0.1.49-rc10
v0.1.49-rc9
v0.1.49-rc8
v0.1.49-rc7
v0.1.49-rc6
v0.1.49-rc4
v0.1.49-rc5
v0.1.49-rc3
v0.1.49-rc2
v0.1.49-rc1
v0.1.48
v0.1.47
v0.1.46
v0.1.45-rc5
v0.1.45
v0.1.45-rc4
v0.1.45-rc3
v0.1.45-rc2
v0.1.45-rc1
v0.1.44
v0.1.43
v0.1.42
v0.1.41
v0.1.40
v0.1.40-rc1
v0.1.39
v0.1.39-rc2
v0.1.39-rc1
v0.1.38
v0.1.37
v0.1.36
v0.1.35
v0.1.35-rc1
v0.1.34
v0.1.34-rc1
v0.1.33
v0.1.33-rc7
v0.1.33-rc6
v0.1.33-rc5
v0.1.33-rc4
v0.1.33-rc3
v0.1.33-rc2
v0.1.33-rc1
v0.1.32
v0.1.32-rc2
v0.1.32-rc1
v0.1.31
v0.1.30
v0.1.29
v0.1.28
v0.1.27
v0.1.26
v0.1.25
v0.1.24
v0.1.23
v0.1.22
v0.1.21
v0.1.20
v0.1.19
v0.1.18
v0.1.17
v0.1.16
v0.1.15
v0.1.14
v0.1.13
v0.1.12
v0.1.11
v0.1.10
v0.1.9
v0.1.8
v0.1.7
v0.1.6
v0.1.5
v0.1.4
v0.1.3
v0.1.2
v0.1.1
v0.1.0
v0.0.21
v0.0.20
v0.0.19
v0.0.18
v0.0.17
v0.0.16
v0.0.15
v0.0.14
v0.0.13
v0.0.12
v0.0.11
v0.0.10
v0.0.9
v0.0.8
v0.0.7
v0.0.6
v0.0.5
v0.0.4
v0.0.3
v0.0.2
v0.0.1
Labels
Clear labels
amd
api
app
bug
build
cli
cloud
compatibility
context-length
create
docker
documentation
embeddings
feature request
feedback wanted
good first issue
gpt-oss
gpu
harmony
help wanted
image
install
intel
js
launch
linux
macos
memory
mlx
model
needs more info
networking
nvidia
ollama.com
performance
pull-request
python
question
registry
rendering
thinking
tools
top
vulkan
windows
wsl
Mirrored from GitHub Pull Request
Milestone
No items
No Milestone
Projects
Clear projects
No project
No Assignees
Notifications
Due Date
No due date set.
Dependencies
No dependencies set.
Reference: github-starred/ollama#2115
Reference in New Issue
Block a user
Blocking a user prevents them from interacting with repositories, such as opening or commenting on pull requests or issues. Learn more about blocking a user.
Delete Branch "%!s()"
Deleting a branch is permanent. Although the deleted branch may continue to exist for a short time before it actually gets removed, it CANNOT be undone in most cases. Continue?
Originally created by @systerchristian on GitHub (Apr 1, 2024).
Original GitHub issue: https://github.com/ollama/ollama/issues/3431
Originally assigned to: @dhiltgen on GitHub.
What is the issue?
Getting a "CUDA Error: out of memory error" with command-r after message is returned. I am seeing this with Open Web-UI. Error is after it responds to a message. It happens every time.
Here is the tail end of the log file.
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv 0: general.architecture str = command-r
llama_model_loader: - kv 1: general.name str = c4ai-command-r-v01
llama_model_loader: - kv 2: command-r.block_count u32 = 40
llama_model_loader: - kv 3: command-r.context_length u32 = 131072
llama_model_loader: - kv 4: command-r.embedding_length u32 = 8192
llama_model_loader: - kv 5: command-r.feed_forward_length u32 = 22528
llama_model_loader: - kv 6: command-r.attention.head_count u32 = 64
llama_model_loader: - kv 7: command-r.attention.head_count_kv u32 = 64
llama_model_loader: - kv 8: command-r.rope.freq_base f32 = 8000000.000000
llama_model_loader: - kv 9: command-r.attention.layer_norm_epsilon f32 = 0.000010
llama_model_loader: - kv 10: general.file_type u32 = 12
llama_model_loader: - kv 11: command-r.logit_scale f32 = 0.062500
llama_model_loader: - kv 12: command-r.rope.scaling.type str = none
llama_model_loader: - kv 13: tokenizer.ggml.model str = gpt2
llama_model_loader: - kv 14: tokenizer.ggml.tokens arr[str,256000] = ["", "", "", "", ...
llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,256000] = [3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, ...
llama_model_loader: - kv 16: tokenizer.ggml.merges arr[str,253333] = ["Ġ Ġ", "Ġ t", "e r", "i n", "Ġ a...
llama_model_loader: - kv 17: tokenizer.ggml.bos_token_id u32 = 5
llama_model_loader: - kv 18: tokenizer.ggml.eos_token_id u32 = 255001
llama_model_loader: - kv 19: tokenizer.ggml.padding_token_id u32 = 0
llama_model_loader: - kv 20: tokenizer.ggml.add_bos_token bool = true
llama_model_loader: - kv 21: tokenizer.ggml.add_eos_token bool = false
llama_model_loader: - kv 22: general.quantization_version u32 = 2
llama_model_loader: - type f32: 41 tensors
llama_model_loader: - type q3_K: 160 tensors
llama_model_loader: - type q4_K: 116 tensors
llama_model_loader: - type q5_K: 4 tensors
llama_model_loader: - type q6_K: 1 tensors
llm_load_vocab: special tokens definition check successful ( 1008/256000 ).
llm_load_print_meta: format = GGUF V3 (latest)
llm_load_print_meta: arch = command-r
llm_load_print_meta: vocab type = BPE
llm_load_print_meta: n_vocab = 256000
llm_load_print_meta: n_merges = 253333
llm_load_print_meta: n_ctx_train = 131072
llm_load_print_meta: n_embd = 8192
llm_load_print_meta: n_head = 64
llm_load_print_meta: n_head_kv = 64
llm_load_print_meta: n_layer = 40
llm_load_print_meta: n_rot = 128
llm_load_print_meta: n_embd_head_k = 128
llm_load_print_meta: n_embd_head_v = 128
llm_load_print_meta: n_gqa = 1
llm_load_print_meta: n_embd_k_gqa = 8192
llm_load_print_meta: n_embd_v_gqa = 8192
llm_load_print_meta: f_norm_eps = 1.0e-05
llm_load_print_meta: f_norm_rms_eps = 0.0e+00
llm_load_print_meta: f_clamp_kqv = 0.0e+00
llm_load_print_meta: f_max_alibi_bias = 0.0e+00
llm_load_print_meta: f_logit_scale = 6.2e-02
llm_load_print_meta: n_ff = 22528
llm_load_print_meta: n_expert = 0
llm_load_print_meta: n_expert_used = 0
llm_load_print_meta: causal attn = 1
llm_load_print_meta: pooling type = 0
llm_load_print_meta: rope type = 0
llm_load_print_meta: rope scaling = none
llm_load_print_meta: freq_base_train = 8000000.0
llm_load_print_meta: freq_scale_train = 1
llm_load_print_meta: n_yarn_orig_ctx = 131072
llm_load_print_meta: rope_finetuned = unknown
llm_load_print_meta: ssm_d_conv = 0
llm_load_print_meta: ssm_d_inner = 0
llm_load_print_meta: ssm_d_state = 0
llm_load_print_meta: ssm_dt_rank = 0
llm_load_print_meta: model type = 35B
llm_load_print_meta: model ftype = Q3_K - Medium
llm_load_print_meta: model params = 34.98 B
llm_load_print_meta: model size = 16.40 GiB (4.03 BPW)
llm_load_print_meta: general.name = c4ai-command-r-v01
llm_load_print_meta: BOS token = 5 '<BOS_TOKEN>'
llm_load_print_meta: EOS token = 255001 '<|END_OF_TURN_TOKEN|>'
llm_load_print_meta: PAD token = 0 ''
llm_load_print_meta: LF token = 136 'Ä'
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: CUDA_USE_TENSOR_CORES: yes
ggml_cuda_init: found 1 CUDA devices:
Device 0: NVIDIA GeForce RTX 4090, compute capability 8.9, VMM: yes
llm_load_tensors: ggml ctx size = 0.25 MiB
llm_load_tensors: offloading 40 repeating layers to GPU
llm_load_tensors: offloading non-repeating layers to GPU
llm_load_tensors: offloaded 41/41 layers to GPU
llm_load_tensors: CPU buffer size = 1640.62 MiB
llm_load_tensors: CUDA0 buffer size = 16791.91 MiB
.....................................................................................
llama_new_context_with_model: n_ctx = 2048
llama_new_context_with_model: n_batch = 512
llama_new_context_with_model: n_ubatch = 512
llama_new_context_with_model: freq_base = 8000000.0
llama_new_context_with_model: freq_scale = 1
llama_kv_cache_init: CUDA0 KV buffer size = 2560.00 MiB
llama_new_context_with_model: KV self size = 2560.00 MiB, K (f16): 1280.00 MiB, V (f16): 1280.00 MiB
llama_new_context_with_model: CUDA_Host output buffer size = 516.00 MiB
llama_new_context_with_model: CUDA0 compute buffer size = 516.00 MiB
llama_new_context_with_model: CUDA_Host compute buffer size = 20.00 MiB
llama_new_context_with_model: graph nodes = 1245
llama_new_context_with_model: graph splits = 2
{"function":"initialize","level":"INFO","line":444,"msg":"initializing slots","n_slots":1,"tid":"240","timestamp":1711936563}
{"function":"initialize","level":"INFO","line":456,"msg":"new slot","n_ctx_slot":2048,"slot_id":0,"tid":"240","timestamp":1711936563}
time=2024-03-31T18:56:03.805-07:00 level=INFO source=dyn_ext_server.go:159 msg="Starting llama main loop"
{"function":"update_slots","level":"INFO","line":1572,"msg":"all slots are idle and system prompt is empty, clear the KV cache","tid":"35172","timestamp":1711936563}
[GIN] 2024/03/31 - 18:56:03 | 200 | 9.3664512s | 192.168.50.60 | POST "/api/chat"
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":0,"tid":"35172","timestamp":1711936570}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":0,"n_past_se":0,"n_prompt_tokens_processed":8,"slot_id":0,"task_id":0,"tid":"35172","timestamp":1711936570}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":0,"slot_id":0,"task_id":0,"tid":"35172","timestamp":1711936570}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 353.31 ms / 8 tokens ( 44.16 ms per token, 22.64 tokens per second)","n_prompt_tokens_processed":8,"n_tokens_second":22.64332517230155,"slot_id":0,"t_prompt_processing":353.305,"t_token":44.163125,"task_id":0,"tid":"35172","timestamp":1711936571}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 739.95 ms / 29 runs ( 25.52 ms per token, 39.19 tokens per second)","n_decoded":29,"n_tokens_second":39.191678390384254,"slot_id":0,"t_token":25.515620689655172,"t_token_generation":739.953,"task_id":0,"tid":"35172","timestamp":1711936571}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 1093.26 ms","slot_id":0,"t_prompt_processing":353.305,"t_token_generation":739.953,"t_total":1093.258,"task_id":0,"tid":"35172","timestamp":1711936571}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":37,"n_ctx":2048,"n_past":36,"n_system_tokens":0,"slot_id":0,"task_id":0,"tid":"35172","timestamp":1711936571,"truncated":false}
[GIN] 2024/03/31 - 18:56:11 | 200 | 1.0944379s | 192.168.50.60 | POST "/api/chat"
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":32,"tid":"35172","timestamp":1711936587}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":36,"n_past_se":0,"n_prompt_tokens_processed":10,"slot_id":0,"task_id":32,"tid":"35172","timestamp":1711936587}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":36,"slot_id":0,"task_id":32,"tid":"35172","timestamp":1711936587}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 289.25 ms / 10 tokens ( 28.92 ms per token, 34.57 tokens per second)","n_prompt_tokens_processed":10,"n_tokens_second":34.57228892753302,"slot_id":0,"t_prompt_processing":289.249,"t_token":28.9249,"task_id":32,"tid":"35172","timestamp":1711936589}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 1058.89 ms / 41 runs ( 25.83 ms per token, 38.72 tokens per second)","n_decoded":41,"n_tokens_second":38.719828046188034,"slot_id":0,"t_token":25.826560975609752,"t_token_generation":1058.889,"task_id":32,"tid":"35172","timestamp":1711936589}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 1348.14 ms","slot_id":0,"t_prompt_processing":289.249,"t_token_generation":1058.889,"t_total":1348.138,"task_id":32,"tid":"35172","timestamp":1711936589}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":86,"n_ctx":2048,"n_past":86,"n_system_tokens":0,"slot_id":0,"task_id":32,"tid":"35172","timestamp":1711936589,"truncated":false}
[GIN] 2024/03/31 - 18:56:29 | 200 | 1.3504819s | 192.168.50.60 | POST "/api/chat"
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":76,"tid":"35172","timestamp":1711936611}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":84,"n_past_se":0,"n_prompt_tokens_processed":9,"slot_id":0,"task_id":76,"tid":"35172","timestamp":1711936611}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":84,"slot_id":0,"task_id":76,"tid":"35172","timestamp":1711936611}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 322.37 ms / 9 tokens ( 35.82 ms per token, 27.92 tokens per second)","n_prompt_tokens_processed":9,"n_tokens_second":27.918230604584796,"slot_id":0,"t_prompt_processing":322.37,"t_token":35.81888888888889,"task_id":76,"tid":"35172","timestamp":1711936612}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 1279.00 ms / 49 runs ( 26.10 ms per token, 38.31 tokens per second)","n_decoded":49,"n_tokens_second":38.31106079418048,"slot_id":0,"t_token":26.10212244897959,"t_token_generation":1279.004,"task_id":76,"tid":"35172","timestamp":1711936612}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 1601.37 ms","slot_id":0,"t_prompt_processing":322.37,"t_token_generation":1279.004,"t_total":1601.3739999999998,"task_id":76,"tid":"35172","timestamp":1711936612}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":138,"n_ctx":2048,"n_past":141,"n_system_tokens":0,"slot_id":0,"task_id":76,"tid":"35172","timestamp":1711936612,"truncated":false}
[GIN] 2024/03/31 - 18:56:52 | 200 | 1.6048077s | 192.168.50.60 | POST "/api/chat"
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":128,"tid":"35172","timestamp":1711936639}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":3,"n_past_se":0,"n_prompt_tokens_processed":48,"slot_id":0,"task_id":128,"tid":"35172","timestamp":1711936639}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":3,"slot_id":0,"task_id":128,"tid":"35172","timestamp":1711936639}
CUDA error: out of memory
current device: 0, in function alloc at C:\a\ollama\ollama\llm\llama.cpp\ggml-cuda.cu:532
cuMemSetAccess(pool_addr + pool_size, reserve_size, &access, 1)
GGML_ASSERT: C:\a\ollama\ollama\llm\llama.cpp\ggml-cuda.cu:193: !"CUDA error"
What did you expect to see?
Ollama to not crash. :)
Steps to reproduce
Any message sent while a Command-r model is loaded. Have been able to replicate with both lastest and command-r:35b-v0.1-q3_K_M. Both models appear to work fine from the console.
Are there any recent changes that introduced the issue?
Nope. Tho I haven't been working with Ollama long.
OS
Windows
Architecture
x86
Platform
No response
Ollama version
0.1.30
GPU
Nvidia
GPU info
Sun Mar 31 19:13:31 2024
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 551.86 Driver Version: 551.86 CUDA Version: 12.4 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA GeForce RTX 4090 WDDM | 00000000:01:00.0 Off | Off |
| 0% 48C P8 18W / 450W | 2107MiB / 24564MiB | 8% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| 0 N/A N/A 6840 C+G ...ft Office\root\Office16\ONENOTE.EXE N/A |
| 0 N/A N/A 7160 C+G ...ekyb3d8bbwe\PhoneExperienceHost.exe N/A |
| 0 N/A N/A 8716 C+G ...2txyewy\StartMenuExperienceHost.exe N/A |
| 0 N/A N/A 10984 C+G ...sair iCUE5 Software\QmlRenderer.exe N/A |
| 0 N/A N/A 14044 C+G ...wekyb3d8bbwe\XboxGameBarWidgets.exe N/A |
| 0 N/A N/A 15560 C+G ...\cef\cef.win7x64\steamwebhelper.exe N/A |
| 0 N/A N/A 16252 C+G ...on\123.0.2420.65\msedgewebview2.exe N/A |
| 0 N/A N/A 17256 C+G ...\LM-Studio\app-0.2.18\LM Studio.exe N/A |
| 0 N/A N/A 20184 C+G ...les\Microsoft OneDrive\OneDrive.exe N/A |
| 0 N/A N/A 21284 C+G ...siveControlPanel\SystemSettings.exe N/A |
| 0 N/A N/A 21684 C+G ...crosoft\Edge\Application\msedge.exe N/A |
| 0 N/A N/A 22008 C+G ...oogle\Chrome\Application\chrome.exe N/A |
| 0 N/A N/A 23756 C+G ...air\Corsair iCUE5 Software\iCUE.exe N/A |
| 0 N/A N/A 24504 C+G ...5n1h2txyewy\ShellExperienceHost.exe N/A |
| 0 N/A N/A 25468 C+G ...e Stream\88.0.0.0\GoogleDriveFS.exe N/A |
| 0 N/A N/A 25996 C+G ...__8wekyb3d8bbwe\WindowsTerminal.exe N/A |
| 0 N/A N/A 28544 C ...\LM-Studio\app-0.2.18\LM Studio.exe N/A |
| 0 N/A N/A 29564 C+G C:\Windows\explorer.exe N/A |
| 0 N/A N/A 30092 C+G ...CBS_cw5n1h2txyewy\TextInputHost.exe N/A |
| 0 N/A N/A 31484 C+G ...nt.CBS_cw5n1h2txyewy\SearchHost.exe N/A |
| 0 N/A N/A 35296 C+G ...__8wekyb3d8bbwe\Notepad\Notepad.exe N/A |
| 0 N/A N/A 36216 C+G ...on\123.0.2420.65\msedgewebview2.exe N/A |
+-----------------------------------------------------------------------------------------+
CPU
Intel
Other software
Open Web-ui
@aluhrs13 commented on GitHub (Apr 4, 2024):
+1, also hitting this I think. Can get detailed logs or anything if needed.
@FonzieBonzo commented on GitHub (Apr 4, 2024):
Same here using NVidia 2070 (8GB) on WIndows 11.
Client using Ollama with openwebui from another (linux) pc
First query works, second i get (and ollama stops) :
llama_new_context_with_model: n_ctx = 2048
llama_new_context_with_model: n_batch = 512
llama_new_context_with_model: n_ubatch = 512
llama_new_context_with_model: freq_base = 10000.0
llama_new_context_with_model: freq_scale = 1
llama_kv_cache_init: CUDA0 KV buffer size = 896.00 MiB
llama_new_context_with_model: KV self size = 896.00 MiB, K (f16): 448.00 MiB, V (f16): 448.00 MiB
llama_new_context_with_model: CUDA_Host output buffer size = 506.00 MiB
llama_new_context_with_model: CUDA0 compute buffer size = 506.00 MiB
llama_new_context_with_model: CUDA_Host compute buffer size = 10.00 MiB
llama_new_context_with_model: graph nodes = 957
llama_new_context_with_model: graph splits = 2
{"function":"initialize","level":"INFO","line":444,"msg":"initializing slots","n_slots":1,"tid":"10524","timestamp":1711990697}
{"function":"initialize","level":"INFO","line":456,"msg":"new slot","n_ctx_slot":2048,"slot_id":0,"tid":"10524","timestamp":1711990697}
time=2024-04-01T18:58:17.918+02:00 level=INFO source=dyn_ext_server.go:159 msg="Starting llama main loop"
{"function":"update_slots","level":"INFO","line":1572,"msg":"all slots are idle and system prompt is empty, clear the KV cache","tid":"3400","timestamp":1711990697}
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":0,"tid":"3400","timestamp":1711990697}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":0,"n_past_se":0,"n_prompt_tokens_processed":16,"slot_id":0,"task_id":0,"tid":"3400","timestamp":1711990697}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":0,"slot_id":0,"task_id":0,"tid":"3400","timestamp":1711990697}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 93.09 ms / 16 tokens ( 5.82 ms per token, 171.87 tokens per second)","n_prompt_tokens_processed":16,"n_tokens_second":171.87298586344693,"slot_id":0,"t_prompt_processing":93.092,"t_token":5.81825,"task_id":0,"tid":"3400","timestamp":1711990702}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 4741.50 ms / 254 runs ( 18.67 ms per token, 53.57 tokens per second)","n_decoded":254,"n_tokens_second":53.569590694582466,"slot_id":0,"t_token":18.667307086614173,"t_token_generation":4741.496,"task_id":0,"tid":"3400","timestamp":1711990702}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 4834.59 ms","slot_id":0,"t_prompt_processing":93.092,"t_token_generation":4741.496,"t_total":4834.588,"task_id":0,"tid":"3400","timestamp":1711990702}
{"function":"update_slots","level":"INFO","line":1642,"msg":"slot released","n_cache_tokens":270,"n_ctx":2048,"n_past":269,"n_system_tokens":0,"slot_id":0,"task_id":0,"tid":"3400","timestamp":1711990702,"truncated":false}
[GIN] 2024/04/01 - 18:58:22 | 200 | 9.0355119s | 10.0.0.4 | POST "/api/chat"
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":257,"tid":"3400","timestamp":1711990702}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1810,"msg":"slot progression","n_past":4,"n_past_se":0,"n_prompt_tokens_processed":50,"slot_id":0,"task_id":257,"tid":"3400","timestamp":1711990702}
{"function":"update_slots","level":"INFO","line":1834,"msg":"kv cache rm [p0, end)","p0":4,"slot_id":0,"task_id":257,"tid":"3400","timestamp":1711990702}
CUDA error: out of memory
current device: 0, in function alloc at C:\a\ollama\ollama\llm\llama.cpp\ggml-cuda.cu:532
cuMemSetAccess(pool_addr + pool_size, reserve_size, &access, 1)
GGML_ASSERT: C:\a\ollama\ollama\llm\llama.cpp\ggml-cuda.cu:193: !"CUDA error"
I have no experience with the language "GO" but if u need something to test...
@systerchristian commented on GitHub (Apr 4, 2024):
In my testing, I did see that forcing the following in the modelfile improved stability:
PARAMETER num_ctx 4196
PARAMETER num_gpu ((Any number greater than 41 worked)
@zigak1 commented on GitHub (Apr 4, 2024):
Have the same issue on Ubuntu 22.04, RTX 2080 Ti, nvidia drivers: 535.161.07 , CUDA version 12.2
Tried with multiple different ollama versions, nvidia drivers, cuda versions, cuda toolkit version. Nothing helps.
time=2024-04-04T13:17:16.349+02:00 level=INFO source=images.go:806 msg="total blobs: 5"
time=2024-04-04T13:17:16.349+02:00 level=INFO source=images.go:813 msg="total unused blobs removed: 0"
time=2024-04-04T13:17:16.349+02:00 level=INFO source=routes.go:1110 msg="Listening on 127.0.0.1:11434 (version 0.1.29)"
time=2024-04-04T13:17:16.349+02:00 level=INFO source=payload_common.go:112 msg="Extracting dynamic libraries to /tmp/ollama3032254341/runners ..."
time=2024-04-04T13:17:18.918+02:00 level=INFO source=payload_common.go:139 msg="Dynamic LLM libraries [rocm_v60000 cpu cpu_avx2 cuda_v11 cpu_avx]"
time=2024-04-04T13:17:18.918+02:00 level=INFO source=gpu.go:77 msg="Detecting GPU type"
time=2024-04-04T13:17:18.918+02:00 level=INFO source=gpu.go:191 msg="Searching for GPU management library libnvidia-ml.so"
time=2024-04-04T13:17:18.920+02:00 level=INFO source=gpu.go:237 msg="Discovered GPU libraries: [/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.535.161.07]"
time=2024-04-04T13:17:18.927+02:00 level=INFO source=gpu.go:82 msg="Nvidia GPU detected"
time=2024-04-04T13:17:18.927+02:00 level=INFO source=cpu_common.go:11 msg="CPU has AVX2"
time=2024-04-04T13:17:18.932+02:00 level=INFO source=gpu.go:119 msg="CUDA Compute Capability detected: 7.5"
[GIN] 2024/04/04 - 13:18:00 | 200 | 39.782µs | 127.0.0.1 | HEAD "/"
[GIN] 2024/04/04 - 13:18:00 | 200 | 917.981µs | 127.0.0.1 | POST "/api/show"
[GIN] 2024/04/04 - 13:18:00 | 200 | 265.456µs | 127.0.0.1 | POST "/api/show"
time=2024-04-04T13:18:01.786+02:00 level=INFO source=cpu_common.go:11 msg="CPU has AVX2"
time=2024-04-04T13:18:01.787+02:00 level=INFO source=gpu.go:119 msg="CUDA Compute Capability detected: 7.5"
time=2024-04-04T13:18:01.787+02:00 level=INFO source=cpu_common.go:11 msg="CPU has AVX2"
time=2024-04-04T13:18:01.787+02:00 level=INFO source=gpu.go:119 msg="CUDA Compute Capability detected: 7.5"
time=2024-04-04T13:18:01.787+02:00 level=INFO source=cpu_common.go:11 msg="CPU has AVX2"
loading library /tmp/ollama3032254341/runners/cuda_v11/libext_server.so
time=2024-04-04T13:18:01.793+02:00 level=INFO source=dyn_ext_server.go:90 msg="Loading Dynamic llm server: /tmp/ollama3032254341/runners/cuda_v11/libext_server.so"
time=2024-04-04T13:18:01.793+02:00 level=INFO source=dyn_ext_server.go:150 msg="Initializing llama server"
ggml_init_cublas: GGML_CUDA_FORCE_MMQ: yes
ggml_init_cublas: CUDA_USE_TENSOR_CORES: no
ggml_init_cublas: found 1 CUDA devices:
Device 0: NVIDIA GeForce RTX 2080 Ti, compute capability 7.5, VMM: yes
llama_model_loader: loaded meta data with 21 key-value pairs and 164 tensors from /home/ml/.ollama/models/blobs/sha256:c1864a5eb19305c40519da12cc543519e48a0697ecd30e15d5ac228644957d12 (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv 0: general.architecture str = gemma
llama_model_loader: - kv 1: general.name str = gemma-2b-it
llama_model_loader: - kv 2: gemma.context_length u32 = 8192
llama_model_loader: - kv 3: gemma.block_count u32 = 18
llama_model_loader: - kv 4: gemma.embedding_length u32 = 2048
llama_model_loader: - kv 5: gemma.feed_forward_length u32 = 16384
llama_model_loader: - kv 6: gemma.attention.head_count u32 = 8
llama_model_loader: - kv 7: gemma.attention.head_count_kv u32 = 1
llama_model_loader: - kv 8: gemma.attention.key_length u32 = 256
llama_model_loader: - kv 9: gemma.attention.value_length u32 = 256
llama_model_loader: - kv 10: gemma.attention.layer_norm_rms_epsilon f32 = 0.000001
llama_model_loader: - kv 11: tokenizer.ggml.model str = llama
llama_model_loader: - kv 12: tokenizer.ggml.bos_token_id u32 = 2
llama_model_loader: - kv 13: tokenizer.ggml.eos_token_id u32 = 1
llama_model_loader: - kv 14: tokenizer.ggml.padding_token_id u32 = 0
llama_model_loader: - kv 15: tokenizer.ggml.unknown_token_id u32 = 3
llama_model_loader: - kv 16: tokenizer.ggml.tokens arr[str,256128] = ["", "", "", "", ...
llama_model_loader: - kv 17: tokenizer.ggml.scores arr[f32,256128] = [0.000000, 0.000000, 0.000000, 0.0000...
llama_model_loader: - kv 18: tokenizer.ggml.token_type arr[i32,256128] = [3, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, ...
llama_model_loader: - kv 19: general.quantization_version u32 = 2
llama_model_loader: - kv 20: general.file_type u32 = 2
llama_model_loader: - type f32: 37 tensors
llama_model_loader: - type q4_0: 126 tensors
llama_model_loader: - type q8_0: 1 tensors
llm_load_vocab: mismatch in special tokens definition ( 544/256128 vs 388/256128 ).
llm_load_print_meta: format = GGUF V3 (latest)
llm_load_print_meta: arch = gemma
llm_load_print_meta: vocab type = SPM
llm_load_print_meta: n_vocab = 256128
llm_load_print_meta: n_merges = 0
llm_load_print_meta: n_ctx_train = 8192
llm_load_print_meta: n_embd = 2048
llm_load_print_meta: n_head = 8
llm_load_print_meta: n_head_kv = 1
llm_load_print_meta: n_layer = 18
llm_load_print_meta: n_rot = 256
llm_load_print_meta: n_embd_head_k = 256
llm_load_print_meta: n_embd_head_v = 256
llm_load_print_meta: n_gqa = 8
llm_load_print_meta: n_embd_k_gqa = 256
llm_load_print_meta: n_embd_v_gqa = 256
llm_load_print_meta: f_norm_eps = 0.0e+00
llm_load_print_meta: f_norm_rms_eps = 1.0e-06
llm_load_print_meta: f_clamp_kqv = 0.0e+00
llm_load_print_meta: f_max_alibi_bias = 0.0e+00
llm_load_print_meta: n_ff = 16384
llm_load_print_meta: n_expert = 0
llm_load_print_meta: n_expert_used = 0
llm_load_print_meta: pooling type = 0
llm_load_print_meta: rope type = 2
llm_load_print_meta: rope scaling = linear
llm_load_print_meta: freq_base_train = 10000.0
llm_load_print_meta: freq_scale_train = 1
llm_load_print_meta: n_yarn_orig_ctx = 8192
llm_load_print_meta: rope_finetuned = unknown
llm_load_print_meta: model type = 2B
llm_load_print_meta: model ftype = Q4_0
llm_load_print_meta: model params = 2.51 B
llm_load_print_meta: model size = 1.56 GiB (5.34 BPW)
llm_load_print_meta: general.name = gemma-2b-it
llm_load_print_meta: BOS token = 2 ''
llm_load_print_meta: EOS token = 1 ''
llm_load_print_meta: UNK token = 3 ''
llm_load_print_meta: PAD token = 0 ''
llm_load_print_meta: LF token = 227 '<0x0A>'
llm_load_tensors: ggml ctx size = 0.13 MiB
llm_load_tensors: offloading 18 repeating layers to GPU
llm_load_tensors: offloading non-repeating layers to GPU
llm_load_tensors: offloaded 19/19 layers to GPU
llm_load_tensors: CPU buffer size = 531.52 MiB
llm_load_tensors: CUDA0 buffer size = 1594.93 MiB
.....................................................
llama_new_context_with_model: n_ctx = 2048
llama_new_context_with_model: freq_base = 10000.0
llama_new_context_with_model: freq_scale = 1
llama_kv_cache_init: CUDA0 KV buffer size = 36.00 MiB
llama_new_context_with_model: KV self size = 36.00 MiB, K (f16): 18.00 MiB, V (f16): 18.00 MiB
llama_new_context_with_model: CUDA_Host input buffer size = 9.02 MiB
llama_new_context_with_model: CUDA0 compute buffer size = 504.25 MiB
llama_new_context_with_model: CUDA_Host compute buffer size = 4.00 MiB
llama_new_context_with_model: graph splits (measure): 2
CUDA error: an illegal memory access was encountered
current device: 0, in function ggml_backend_cuda_get_tensor_async at /go/src/github.com/jmorganca/ollama/llm/llama.cpp/ggml-cuda.cu:12223
cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx->device][0])
GGML_ASSERT: /go/src/github.com/jmorganca/ollama/llm/llama.cpp/ggml-cuda.cu:256: !"CUDA error"
Could not attach to process. If your uid matches the uid of the target
process, check the setting of /proc/sys/kernel/yama/ptrace_scope, or try
again as the root user. For more details, see /etc/sysctl.d/10-ptrace.conf
ptrace: Operation not permitted.
No stack.
The program is not being run.
SIGABRT: abort
PC=0x73ed1c8969fc m=7 sigcode=18446744073709551610
signal arrived during cgo execution
goroutine 50 gp=0xc0000aae00 m=7 mp=0xc000580008 [syscall]:
runtime.cgocall(0xebb2b0, 0xc00004c6f8)
runtime/cgocall.go:157 +0x4b fp=0xc00004c6d0 sp=0xc00004c698 pc=0x40a72b
github.com/jmorganca/ollama/llm._Cfunc_dyn_llama_server_init({0x73ecbc013ed0, 0x73ec96090e90, 0x73ec96080a20, 0x73ec960902e0, 0x73ec96095cb0, 0x73ec9608a940, 0x73ec96083650, 0x73ec96080aa0, 0x73ec960965b0, 0x73ec96095850, ...}, ...)
_cgo_gotypes.go:286 +0x45 fp=0xc00004c6f8 sp=0xc00004c6d0 pc=0xce5ca5
github.com/jmorganca/ollama/llm.newDynExtServer.func7(0xc00007e500, 0xc001a6f050)
github.com/jmorganca/ollama/llm/dyn_ext_server.go:154 +0x112 fp=0xc00004c838 sp=0xc00004c6f8 pc=0xce7352
github.com/jmorganca/ollama/llm.newDynExtServer({0xc0005c8000, 0x37}, {0xc00067eaf0, }, {, _, }, {0x0, 0x0, 0x0}, ...)
github.com/jmorganca/ollama/llm/dyn_ext_server.go:154 +0xb50 fp=0xc00004ca80 sp=0xc00004c838 pc=0xce6f90
github.com/jmorganca/ollama/llm.newLlmServer({{, _, }, {, }, {, }}, {, _}, {0x0, ...}, ...)
github.com/jmorganca/ollama/llm/llm.go:166 +0x4c5 fp=0xc00004cc40 sp=0xc00004ca80 pc=0xce3525
github.com/jmorganca/ollama/llm.New({0xc00067eaf0, 0x65}, {0x0, 0x0, 0x0}, {0x0, _, _}, {{0x0, 0x800, ...}, ...})
github.com/jmorganca/ollama/llm/llm.go:131 +0x90e fp=0xc00004ced8 sp=0xc00004cc40 pc=0xce2ece
github.com/jmorganca/ollama/server.load(0xc000003500?, 0xc000003500, {{0x0, 0x800, 0x200, 0x1, 0xffffffffffffffff, 0x0, 0x0, 0x1, ...}, ...}, ...)
github.com/jmorganca/ollama/server/routes.go:86 +0x325 fp=0xc00004d028 sp=0xc00004ced8 pc=0xe93c65
github.com/jmorganca/ollama/server.ChatHandler(0xc0000e8c00)
github.com/jmorganca/ollama/server/routes.go:1264 +0xa4b fp=0xc00004d730 sp=0xc00004d028 pc=0xe9f96b
github.com/gin-gonic/gin.(*Context).Next(0xc0000e8c00)
github.com/gin-gonic/gin@v1.9.1/context.go:174 +0x2b fp=0xc00004d750 sp=0xc00004d730 pc=0xe674eb
github.com/jmorganca/ollama/server.(*Server).GenerateRoutes.allowedHostsMiddleware.func3(0xc0000e8c00)
github.com/jmorganca/ollama/server/routes.go:1001 +0x115 fp=0xc00004d7a8 sp=0xc00004d750 pc=0xe9e0d5
github.com/gin-gonic/gin.(*Context).Next(...)
github.com/gin-gonic/gin@v1.9.1/context.go:174
github.com/gin-gonic/gin.CustomRecoveryWithWriter.func1(0xc0000e8c00)
github.com/gin-gonic/gin@v1.9.1/recovery.go:102 +0x7a fp=0xc00004d7f8 sp=0xc00004d7a8 pc=0xe743da
github.com/gin-gonic/gin.(*Context).Next(...)
github.com/gin-gonic/gin@v1.9.1/context.go:174
github.com/gin-gonic/gin.LoggerWithConfig.func1(0xc0000e8c00)
github.com/gin-gonic/gin@v1.9.1/logger.go:240 +0xdd fp=0xc00004d9a8 sp=0xc00004d7f8 pc=0xe7351d
github.com/gin-gonic/gin.(*Context).Next(...)
github.com/gin-gonic/gin@v1.9.1/context.go:174
github.com/gin-gonic/gin.(*Engine).handleHTTPRequest(0xc0005101a0, 0xc0000e8c00)
github.com/gin-gonic/gin@v1.9.1/gin.go:620 +0x66e fp=0xc00004db28 sp=0xc00004d9a8 pc=0xe72a0e
github.com/gin-gonic/gin.(*Engine).ServeHTTP(0xc0005101a0, {0x11654090, 0xc0006161c0}, 0xc0005230e0)
github.com/gin-gonic/gin@v1.9.1/gin.go:576 +0x1b2 fp=0xc00004db60 sp=0xc00004db28 pc=0xe721d2
net/http.serverHandler.ServeHTTP({0x11651f70?}, {0x11654090?, 0xc0006161c0?}, 0x6?)
net/http/server.go:3137 +0x8e fp=0xc00004db90 sp=0xc00004db60 pc=0x6fef4e
net/http.(*conn).serve(0xc00017a1b0, {0x11656448, 0xc0000c2f30})
net/http/server.go:2039 +0x5e8 fp=0xc00004dfb8 sp=0xc00004db90 pc=0x6fa308
net/http.(*Server).Serve.gowrap3()
net/http/server.go:3285 +0x28 fp=0xc00004dfe0 sp=0xc00004dfb8 pc=0x6ff768
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc00004dfe8 sp=0xc00004dfe0 pc=0x4742e1
created by net/http.(*Server).Serve in goroutine 1
net/http/server.go:3285 +0x4b4
goroutine 1 gp=0xc0000061c0 m=nil [IO wait]:
runtime.gopark(0xc00005d408?, 0x0?, 0xc0?, 0x61?, 0xc0008c3868?)
runtime/proc.go:402 +0xce fp=0xc0008c3830 sp=0xc0008c3810 pc=0x44160e
runtime.netpollblock(0xc0008c38c8?, 0x409ec6?, 0x0?)
runtime/netpoll.go:573 +0xf7 fp=0xc0008c3868 sp=0xc0008c3830 pc=0x43a377
internal/poll.runtime_pollWait(0x73ed1ca566d0, 0x72)
runtime/netpoll.go:345 +0x85 fp=0xc0008c3888 sp=0xc0008c3868 pc=0x46e9e5
internal/poll.(*pollDesc).wait(0x3?, 0x3fe?, 0x0)
internal/poll/fd_poll_runtime.go:84 +0x27 fp=0xc0008c38b0 sp=0xc0008c3888 pc=0x5030a7
internal/poll.(*pollDesc).waitRead(...)
internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Accept(0xc0000e4400)
internal/poll/fd_unix.go:611 +0x2ac fp=0xc0008c3958 sp=0xc0008c38b0 pc=0x50844c
net.(*netFD).accept(0xc0000e4400)
net/fd_unix.go:172 +0x29 fp=0xc0008c3a10 sp=0xc0008c3958 pc=0x597c49
net.(*TCPListener).accept(0xc000514820)
net/tcpsock_posix.go:159 +0x1e fp=0xc0008c3a38 sp=0xc0008c3a10 pc=0x5adb7e
net.(*TCPListener).Accept(0xc000514820)
net/tcpsock.go:327 +0x30 fp=0xc0008c3a68 sp=0xc0008c3a38 pc=0x5acd70
net/http.(*onceCloseListener).Accept(0xc00017a1b0?)
:1 +0x24 fp=0xc0008c3a80 sp=0xc0008c3a68 pc=0x7219a4
net/http.(*Server).Serve(0xc0004ae000, {0x11653e20, 0xc000514820})
net/http/server.go:3255 +0x33e fp=0xc0008c3bb0 sp=0xc0008c3a80 pc=0x6ff37e
github.com/jmorganca/ollama/server.Serve({0x11653e20, 0xc000514820})
github.com/jmorganca/ollama/server/routes.go:1137 +0x4bf fp=0xc0008c3cc0 sp=0xc0008c3bb0 pc=0xe9e61f
github.com/jmorganca/ollama/cmd.RunServer(0xc0000e8a00?, {0x11da2340?, 0x4?, 0x1050bd5?})
github.com/jmorganca/ollama/cmd/cmd.go:787 +0x1b9 fp=0xc0008c3d58 sp=0xc0008c3cc0 pc=0xeb2219
github.com/spf13/cobra.(*Command).execute(0xc0000d0f08, {0x11da2340, 0x0, 0x0})
github.com/spf13/cobra@v1.7.0/command.go:940 +0x882 fp=0xc0008c3e78 sp=0xc0008c3d58 pc=0x794922
github.com/spf13/cobra.(*Command).ExecuteC(0xc0000d0308)
github.com/spf13/cobra@v1.7.0/command.go:1068 +0x3a5 fp=0xc0008c3f30 sp=0xc0008c3e78 pc=0x795165
github.com/spf13/cobra.(*Command).Execute(...)
github.com/spf13/cobra@v1.7.0/command.go:992
github.com/spf13/cobra.(*Command).ExecuteContext(...)
github.com/spf13/cobra@v1.7.0/command.go:985
main.main()
github.com/jmorganca/ollama/main.go:11 +0x4d fp=0xc0008c3f50 sp=0xc0008c3f30 pc=0xeba3cd
runtime.main()
runtime/proc.go:271 +0x29d fp=0xc0008c3fe0 sp=0xc0008c3f50 pc=0x4411dd
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0008c3fe8 sp=0xc0008c3fe0 pc=0x4742e1
goroutine 2 gp=0xc000006c40 m=nil [force gc (idle)]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc000074fa8 sp=0xc000074f88 pc=0x44160e
runtime.goparkunlock(...)
runtime/proc.go:408
runtime.forcegchelper()
runtime/proc.go:326 +0xb3 fp=0xc000074fe0 sp=0xc000074fa8 pc=0x441493
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc000074fe8 sp=0xc000074fe0 pc=0x4742e1
created by runtime.init.6 in goroutine 1
runtime/proc.go:314 +0x1a
goroutine 3 gp=0xc000007180 m=nil [GC sweep wait]:
runtime.gopark(0x1?, 0x0?, 0x0?, 0x0?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc000075780 sp=0xc000075760 pc=0x44160e
runtime.goparkunlock(...)
runtime/proc.go:408
runtime.bgsweep(0xc000046070)
runtime/mgcsweep.go:318 +0xdf fp=0xc0000757c8 sp=0xc000075780 pc=0x42cbbf
runtime.gcenable.gowrap1()
runtime/mgc.go:203 +0x25 fp=0xc0000757e0 sp=0xc0000757c8 pc=0x4214a5
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000757e8 sp=0xc0000757e0 pc=0x4742e1
created by runtime.gcenable in goroutine 1
runtime/mgc.go:203 +0x66
goroutine 4 gp=0xc000007340 m=nil [GC scavenge wait]:
runtime.gopark(0x88b129d?, 0x881f5b3?, 0x0?, 0x0?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc000075f78 sp=0xc000075f58 pc=0x44160e
runtime.goparkunlock(...)
runtime/proc.go:408
runtime.(*scavengerState).park(0x11d3c300)
runtime/mgcscavenge.go:425 +0x49 fp=0xc000075fa8 sp=0xc000075f78 pc=0x42a549
runtime.bgscavenge(0xc000046070)
runtime/mgcscavenge.go:658 +0x59 fp=0xc000075fc8 sp=0xc000075fa8 pc=0x42aaf9
runtime.gcenable.gowrap2()
runtime/mgc.go:204 +0x25 fp=0xc000075fe0 sp=0xc000075fc8 pc=0x421445
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc000075fe8 sp=0xc000075fe0 pc=0x4742e1
created by runtime.gcenable in goroutine 1
runtime/mgc.go:204 +0xa5
goroutine 5 gp=0xc000007c00 m=nil [finalizer wait]:
runtime.gopark(0xc000074648?, 0x414865?, 0xa8?, 0x1?, 0xc0000061c0?)
runtime/proc.go:402 +0xce fp=0xc000074620 sp=0xc000074600 pc=0x44160e
runtime.runfinq()
runtime/mfinal.go:194 +0x107 fp=0xc0000747e0 sp=0xc000074620 pc=0x4204e7
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000747e8 sp=0xc0000747e0 pc=0x4742e1
created by runtime.createfing in goroutine 1
runtime/mfinal.go:164 +0x3d
goroutine 6 gp=0xc00015d6c0 m=nil [GC worker (idle)]:
runtime.gopark(0x4d56728c96dd?, 0x1?, 0x9c?, 0x6c?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc000076750 sp=0xc000076730 pc=0x44160e
runtime.gcBgMarkWorker()
runtime/mgc.go:1310 +0xe5 fp=0xc0000767e0 sp=0xc000076750 pc=0x423585
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000767e8 sp=0xc0000767e0 pc=0x4742e1
created by runtime.gcBgMarkStartWorkers in goroutine 1
runtime/mgc.go:1234 +0x1c
goroutine 7 gp=0xc00015d880 m=nil [GC worker (idle)]:
runtime.gopark(0x4d56728c968b?, 0x1?, 0xdd?, 0x34?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc000076f50 sp=0xc000076f30 pc=0x44160e
runtime.gcBgMarkWorker()
runtime/mgc.go:1310 +0xe5 fp=0xc000076fe0 sp=0xc000076f50 pc=0x423585
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc000076fe8 sp=0xc000076fe0 pc=0x4742e1
created by runtime.gcBgMarkStartWorkers in goroutine 1
runtime/mgc.go:1234 +0x1c
goroutine 8 gp=0xc00015da40 m=nil [GC worker (idle)]:
runtime.gopark(0x4d56728c9760?, 0x3?, 0xde?, 0x4d?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc000077750 sp=0xc000077730 pc=0x44160e
runtime.gcBgMarkWorker()
runtime/mgc.go:1310 +0xe5 fp=0xc0000777e0 sp=0xc000077750 pc=0x423585
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000777e8 sp=0xc0000777e0 pc=0x4742e1
created by runtime.gcBgMarkStartWorkers in goroutine 1
runtime/mgc.go:1234 +0x1c
goroutine 9 gp=0xc00015dc00 m=nil [GC worker (idle)]:
runtime.gopark(0x4d56728c9745?, 0x3?, 0xe1?, 0xc1?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc000077f50 sp=0xc000077f30 pc=0x44160e
runtime.gcBgMarkWorker()
runtime/mgc.go:1310 +0xe5 fp=0xc000077fe0 sp=0xc000077f50 pc=0x423585
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc000077fe8 sp=0xc000077fe0 pc=0x4742e1
created by runtime.gcBgMarkStartWorkers in goroutine 1
runtime/mgc.go:1234 +0x1c
goroutine 18 gp=0xc000500000 m=nil [GC worker (idle)]:
runtime.gopark(0x4d56728c97ef?, 0x3?, 0x44?, 0x27?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc000070750 sp=0xc000070730 pc=0x44160e
runtime.gcBgMarkWorker()
runtime/mgc.go:1310 +0xe5 fp=0xc0000707e0 sp=0xc000070750 pc=0x423585
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000707e8 sp=0xc0000707e0 pc=0x4742e1
created by runtime.gcBgMarkStartWorkers in goroutine 1
runtime/mgc.go:1234 +0x1c
goroutine 19 gp=0xc0005001c0 m=nil [GC worker (idle)]:
runtime.gopark(0x4d5665686167?, 0x1?, 0x1f?, 0x1b?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc000070f50 sp=0xc000070f30 pc=0x44160e
runtime.gcBgMarkWorker()
runtime/mgc.go:1310 +0xe5 fp=0xc000070fe0 sp=0xc000070f50 pc=0x423585
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc000070fe8 sp=0xc000070fe0 pc=0x4742e1
created by runtime.gcBgMarkStartWorkers in goroutine 1
runtime/mgc.go:1234 +0x1c
goroutine 20 gp=0xc000500380 m=nil [GC worker (idle)]:
runtime.gopark(0x4d56728c96b7?, 0x3?, 0x8?, 0x96?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc000071750 sp=0xc000071730 pc=0x44160e
runtime.gcBgMarkWorker()
runtime/mgc.go:1310 +0xe5 fp=0xc0000717e0 sp=0xc000071750 pc=0x423585
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000717e8 sp=0xc0000717e0 pc=0x4742e1
created by runtime.gcBgMarkStartWorkers in goroutine 1
runtime/mgc.go:1234 +0x1c
goroutine 34 gp=0xc0000aa380 m=nil [GC worker (idle)]:
runtime.gopark(0x4d56728c9ddb?, 0x3?, 0x71?, 0x15?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc0000bc750 sp=0xc0000bc730 pc=0x44160e
runtime.gcBgMarkWorker()
runtime/mgc.go:1310 +0xe5 fp=0xc0000bc7e0 sp=0xc0000bc750 pc=0x423585
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000bc7e8 sp=0xc0000bc7e0 pc=0x4742e1
created by runtime.gcBgMarkStartWorkers in goroutine 1
runtime/mgc.go:1234 +0x1c
goroutine 10 gp=0xc000500540 m=nil [select, locked to thread]:
runtime.gopark(0xc0000befa8?, 0x2?, 0xa9?, 0x18?, 0xc0000bef94?)
runtime/proc.go:402 +0xce fp=0xc0000bee38 sp=0xc0000bee18 pc=0x44160e
runtime.selectgo(0xc0000befa8, 0xc0000bef90, 0x0?, 0x0, 0x0?, 0x1)
runtime/select.go:327 +0x725 fp=0xc0000bef58 sp=0xc0000bee38 pc=0x452a65
runtime.ensureSigM.func1()
runtime/signal_unix.go:1034 +0x19f fp=0xc0000befe0 sp=0xc0000bef58 pc=0x46b71f
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000befe8 sp=0xc0000befe0 pc=0x4742e1
created by runtime.ensureSigM in goroutine 1
runtime/signal_unix.go:1017 +0xc8
goroutine 11 gp=0xc000500700 m=9 mp=0xc000680008 [syscall]:
runtime.notetsleepg(0x11da2fc0, 0xffffffffffffffff)
runtime/lock_futex.go:246 +0x29 fp=0xc0000bf7a0 sp=0xc0000bf778 pc=0x412e89
os/signal.signal_recv()
runtime/sigqueue.go:152 +0x29 fp=0xc0000bf7c0 sp=0xc0000bf7a0 pc=0x470d49
os/signal.loop()
os/signal/signal_unix.go:23 +0x13 fp=0xc0000bf7e0 sp=0xc0000bf7c0 pc=0x723d53
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000bf7e8 sp=0xc0000bf7e0 pc=0x4742e1
created by os/signal.Notify.func1.1 in goroutine 1
os/signal/signal.go:151 +0x1f
goroutine 21 gp=0xc0000aa540 m=nil [chan receive]:
runtime.gopark(0x0?, 0x0?, 0x0?, 0x0?, 0x0?)
runtime/proc.go:402 +0xce fp=0xc0000ba720 sp=0xc0000ba700 pc=0x44160e
runtime.chanrecv(0xc0000d6d20, 0x0, 0x1)
runtime/chan.go:583 +0x3bf fp=0xc0000ba798 sp=0xc0000ba720 pc=0x40cd3f
runtime.chanrecv1(0x0?, 0x0?)
runtime/chan.go:442 +0x12 fp=0xc0000ba7c0 sp=0xc0000ba798 pc=0x40c952
github.com/jmorganca/ollama/server.Serve.func2()
github.com/jmorganca/ollama/server/routes.go:1119 +0x19 fp=0xc0000ba7e0 sp=0xc0000ba7c0 pc=0xe9e699
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000ba7e8 sp=0xc0000ba7e0 pc=0x4742e1
created by github.com/jmorganca/ollama/server.Serve in goroutine 1
github.com/jmorganca/ollama/server/routes.go:1118 +0x40e
goroutine 36 gp=0xc0000aafc0 m=nil [IO wait]:
runtime.gopark(0x10?, 0x10?, 0xf0?, 0xd5?, 0xb?)
runtime/proc.go:402 +0xce fp=0xc0000bd5a8 sp=0xc0000bd588 pc=0x44160e
runtime.netpollblock(0x4863f8?, 0x409ec6?, 0x0?)
runtime/netpoll.go:573 +0xf7 fp=0xc0000bd5e0 sp=0xc0000bd5a8 pc=0x43a377
internal/poll.runtime_pollWait(0x73ed1ca565d8, 0x72)
runtime/netpoll.go:345 +0x85 fp=0xc0000bd600 sp=0xc0000bd5e0 pc=0x46e9e5
internal/poll.(*pollDesc).wait(0xc00017e680?, 0xc0000c3031?, 0x0)
internal/poll/fd_poll_runtime.go:84 +0x27 fp=0xc0000bd628 sp=0xc0000bd600 pc=0x5030a7
internal/poll.(*pollDesc).waitRead(...)
internal/poll/fd_poll_runtime.go:89
internal/poll.(*FD).Read(0xc00017e680, {0xc0000c3031, 0x1, 0x1})
internal/poll/fd_unix.go:164 +0x27a fp=0xc0000bd6c0 sp=0xc0000bd628 pc=0x50439a
net.(*netFD).Read(0xc00017e680, {0xc0000c3031?, 0xc0000bd748?, 0x470a50?})
net/fd_posix.go:55 +0x25 fp=0xc0000bd708 sp=0xc0000bd6c0 pc=0x595c65
net.(*conn).Read(0xc0000c6130, {0xc0000c3031?, 0x0?, 0x11da2340?})
net/net.go:179 +0x45 fp=0xc0000bd750 sp=0xc0000bd708 pc=0x5a4ac5
net.(*TCPConn).Read(0x11caba40?, {0xc0000c3031?, 0xc0000c84c0?, 0x7ebe40?})
:1 +0x25 fp=0xc0000bd780 sp=0xc0000bd750 pc=0x5b6145
net/http.(*connReader).backgroundRead(0xc0000c3020)
net/http/server.go:681 +0x37 fp=0xc0000bd7c8 sp=0xc0000bd780 pc=0x6f4277
net/http.(*connReader).startBackgroundRead.gowrap2()
net/http/server.go:677 +0x25 fp=0xc0000bd7e0 sp=0xc0000bd7c8 pc=0x6f41a5
runtime.goexit({})
runtime/asm_amd64.s:1695 +0x1 fp=0xc0000bd7e8 sp=0xc0000bd7e0 pc=0x4742e1
created by net/http.(*connReader).startBackgroundRead in goroutine 50
net/http/server.go:677 +0xba
rax 0x0
rbx 0x73ecc6ffd640
rcx 0x73ed1c8969fc
rdx 0x6
rdi 0x348c
rsi 0x3492
rbp 0x3492
rsp 0x73ecc6ffc150
r8 0x73ecc6ffc220
r9 0x73ecc6ffc1c0
r10 0x8
r11 0x246
r12 0x6
r13 0x16
r14 0x73ecc4475f90
r15 0x73ec97f97440
rip 0x73ed1c8969fc
rflags 0x246
cs 0x33
fs 0x0
gs 0x0
@FonzieBonzo commented on GitHub (Apr 7, 2024):
Where can i set this parameters, or is it not posible with the Windows version of Ollama in combination with Open WebUI?
@ghost commented on GitHub (Apr 10, 2024):
Previously it was running well but after some time, started to show same error that :
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))then i checked "C:\Users<username>\AppData\Local\Ollama\server.log" file and found following error at end of file:
then i tried with following solution :
modifying values of num_ctx & num_gpu it resolved
but after this it is consuming too much RAM about 90% of my RAM!, but yah its running 👍
@systerchristian commented on GitHub (Apr 11, 2024):
Warning. I have had mixed results with my "fix" (which I don't think is actually a fix). Stability is still less than desired. It will frequently get extremely long processing times.
I've moved to KoboldAI for using Command-r. It doesn't seem to have these issues. No clue why, but it works, is faster and stable.
snippet of logs from my last round of testing:(the message it was processing was "Hello there" and a regenerate command)
{"function":"launch_slot_with_data","level":"INFO","line":829,"msg":"slot is processing task","slot_id":0,"task_id":95,"tid":"30204","timestamp":1712868661}
{"function":"update_slots","ga_i":0,"level":"INFO","line":1812,"msg":"slot progression","n_past":3,"n_past_se":0,"n_prompt_tokens_processed":44,"slot_id":0,"task_id":95,"tid":"30204","timestamp":1712868661}
{"function":"update_slots","level":"INFO","line":1836,"msg":"kv cache rm [p0, end)","p0":3,"slot_id":0,"task_id":95,"tid":"30204","timestamp":1712868661}
{"function":"print_timings","level":"INFO","line":272,"msg":"prompt eval time = 118765.66 ms / 44 tokens ( 2699.22 ms per token, 0.37 tokens per second)","n_prompt_tokens_processed":44,"n_tokens_second":0.370477465800762,"slot_id":0,"t_prompt_processing":118765.658,"t_token":2699.2194999999997,"task_id":95,"tid":"30204","timestamp":1712868782}
{"function":"print_timings","level":"INFO","line":286,"msg":"generation eval time = 2988.53 ms / 64 runs ( 46.70 ms per token, 21.42 tokens per second)","n_decoded":64,"n_tokens_second":21.415239485953947,"slot_id":0,"t_token":46.69571875,"t_token_generation":2988.526,"task_id":95,"tid":"30204","timestamp":1712868782}
{"function":"print_timings","level":"INFO","line":295,"msg":" total time = 121754.18 ms","slot_id":0,"t_prompt_processing":118765.658,"t_token_generation":2988.526,"t_total":121754.184,"task_id":95,"tid":"30204","timestamp":1712868782}
{"function":"update_slots","level":"INFO","line":1644,"msg":"slot released","n_cache_tokens":111,"n_ctx":4224,"n_past":110,"n_system_tokens":0,"slot_id":0,"task_id":95,"tid":"30204","timestamp":1712868782,"truncated":false}
[GIN] 2024/04/11 - 13:53:02 | 200 | 2m1s | 192.168.50.32 | POST "/v1/chat/completions"
@dhiltgen commented on GitHub (Jun 1, 2024):
Please give the latest version a try. We've made improvements in the memory prediction, however there's still some more work to do on Windows VRAM visibility tracked in #4599
@dhiltgen commented on GitHub (Jun 22, 2024):
The latest release (0.1.45) has additional fixes that should help. If you're still seeing OOM crashes, please share an updated server log and I'll reopen the issue.