WhisperDartBindings class

Bindings for src/whisper4dart.h.

Regenerate bindings with flutter pub run ffigen --config ffigen.yaml.

Constructors

WhisperDartBindings.new(DynamicLibrary dynamicLibrary)
The symbols are looked up in dynamicLibrary.
WhisperDartBindings.fromLookup(Pointer<T> lookup<T extends NativeType>(String symbolName))
The symbols are looked up with lookup.

Properties

GGML_TENSOR_SIZE int
getter/setter pair
hashCode int
The hash code for this object.
no setterinherited
runtimeType Type
A representation of the runtime type of the object.
no setterinherited

Methods

ggml_abort(Pointer<Char> file, int line, Pointer<Char> fmt) → void
ggml_abs(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_abs_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_acc(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) Pointer<ggml_tensor>
dst = a view(dst, nb1, nb2, nb3, offset) += b return dst
ggml_acc_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) Pointer<ggml_tensor>
ggml_add(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
ggml_add1(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
ggml_add1_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
ggml_add_cast(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int type) Pointer<ggml_tensor>
ggml_add_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
ggml_add_rel_pos(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> pw, Pointer<ggml_tensor> ph) Pointer<ggml_tensor>
used in sam
ggml_add_rel_pos_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> pw, Pointer<ggml_tensor> ph) Pointer<ggml_tensor>
ggml_arange(Pointer<ggml_context> ctx, double start, double stop, double step) Pointer<ggml_tensor>
ggml_are_same_shape(Pointer<ggml_tensor> t0, Pointer<ggml_tensor> t1) bool
ggml_are_same_stride(Pointer<ggml_tensor> t0, Pointer<ggml_tensor> t1) bool
ggml_argmax(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
argmax along rows
ggml_argsort(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int order) Pointer<ggml_tensor>
ggml_backend_alloc_buffer(ggml_backend_t backend, int size) ggml_backend_buffer_t
ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, int value) → void
ggml_backend_buffer_free(ggml_backend_buffer_t buffer) → void
ggml_backend_buffer_get_alignment(ggml_backend_buffer_t buffer) int
ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, Pointer<ggml_tensor> tensor) int
ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) Pointer<Void>
ggml_backend_buffer_get_max_size(ggml_backend_buffer_t buffer) int
ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) int
ggml_backend_buffer_get_type(ggml_backend_buffer_t buffer) ggml_backend_buffer_type_t
ggml_backend_buffer_get_usage(ggml_backend_buffer_t buffer) int
ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, Pointer<ggml_tensor> tensor) → void
ggml_backend_buffer_is_host(ggml_backend_buffer_t buffer) bool
ggml_backend_buffer_name(ggml_backend_buffer_t buffer) Pointer<Char>
ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) → void
ggml_backend_buffer_set_usage(ggml_backend_buffer_t buffer, int usage) → void
ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, int size) ggml_backend_buffer_t
ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) int
ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, Pointer<ggml_tensor> tensor) int
ggml_backend_buft_get_device(ggml_backend_buffer_type_t buft) ggml_backend_dev_t
ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) int
ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) bool
ggml_backend_buft_name(ggml_backend_buffer_type_t buft) Pointer<Char>
Backend buffer type
ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, Pointer<ggml_cgraph> graph, ggml_backend_eval_callback callback, Pointer<Void> user_data) bool
Compare the output of two backends
ggml_backend_cpu_buffer_from_ptr(Pointer<Void> ptr, int size) ggml_backend_buffer_t
CPU buffer types are always available
ggml_backend_cpu_buffer_type() ggml_backend_buffer_type_t
ggml_backend_cpu_init() ggml_backend_t
CPU backend
ggml_backend_cpu_reg() ggml_backend_reg_t
ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, Pointer<Void> abort_callback_data) → void
ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) → void
ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) → void
ggml_backend_dev_backend_reg(ggml_backend_dev_t device) ggml_backend_reg_t
ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, Pointer<Void> ptr, int size, int max_tensor_size) ggml_backend_buffer_t
ggml_backend_dev_buffer_type(ggml_backend_dev_t device) ggml_backend_buffer_type_t
ggml_backend_dev_by_name(Pointer<Char> name) ggml_backend_dev_t
ggml_backend_dev_by_type(int type) ggml_backend_dev_t
ggml_backend_dev_count() int
Device enumeration
ggml_backend_dev_description(ggml_backend_dev_t device) Pointer<Char>
ggml_backend_dev_get(int index) ggml_backend_dev_t
ggml_backend_dev_get_props(ggml_backend_dev_t device, Pointer<ggml_backend_dev_props> props) → void
ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device) ggml_backend_buffer_type_t
ggml_backend_dev_init(ggml_backend_dev_t device, Pointer<Char> params) ggml_backend_t
ggml_backend_dev_memory(ggml_backend_dev_t device, Pointer<Size> free, Pointer<Size> total) → void
ggml_backend_dev_name(ggml_backend_dev_t device) Pointer<Char>
ggml_backend_dev_offload_op(ggml_backend_dev_t device, Pointer<ggml_tensor> op) bool
ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft) bool
ggml_backend_dev_supports_op(ggml_backend_dev_t device, Pointer<ggml_tensor> op) bool
ggml_backend_dev_type1(ggml_backend_dev_t device) int
ggml_backend_device_register(ggml_backend_dev_t device) → void
Backend registry
ggml_backend_event_free(ggml_backend_event_t event) → void
ggml_backend_event_new(ggml_backend_dev_t device) ggml_backend_event_t
Events
ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend) → void
ggml_backend_event_synchronize(ggml_backend_event_t event) → void
ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event) → void
ggml_backend_free(ggml_backend_t backend) → void
ggml_backend_get_alignment(ggml_backend_t backend) int
ggml_backend_get_default_buffer_type(ggml_backend_t backend) ggml_backend_buffer_type_t
ggml_backend_get_device(ggml_backend_t backend) ggml_backend_dev_t
ggml_backend_get_max_size(ggml_backend_t backend) int
ggml_backend_graph_compute(ggml_backend_t backend, Pointer<ggml_cgraph> cgraph) int
ggml_backend_graph_compute_async(ggml_backend_t backend, Pointer<ggml_cgraph> cgraph) int
ggml_backend_graph_copy1(ggml_backend_t backend, Pointer<ggml_cgraph> graph) ggml_backend_graph_copy
Copy a graph to a different backend
ggml_backend_graph_copy_free(ggml_backend_graph_copy copy) → void
ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) int
ggml_backend_graph_plan_create(ggml_backend_t backend, Pointer<ggml_cgraph> cgraph) ggml_backend_graph_plan_t
ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) → void
ggml_backend_guid(ggml_backend_t backend) ggml_guid_t
Backend (stream)
ggml_backend_init_best() ggml_backend_t
= ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
ggml_backend_init_by_name(Pointer<Char> name, Pointer<Char> params) ggml_backend_t
Direct backend (stream) initialization = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
ggml_backend_init_by_type(int type, Pointer<Char> params) ggml_backend_t
= ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
ggml_backend_is_cpu(ggml_backend_t backend) bool
ggml_backend_load(Pointer<Char> path) ggml_backend_reg_t
Load a backend from a dynamic library and register it
ggml_backend_load_all() → void
Load all known backends from dynamic libraries
ggml_backend_load_all_from_path(Pointer<Char> dir_path) → void
ggml_backend_name(ggml_backend_t backend) Pointer<Char>
ggml_backend_offload_op(ggml_backend_t backend, Pointer<ggml_tensor> op) bool
ggml_backend_reg_by_name(Pointer<Char> name) ggml_backend_reg_t
ggml_backend_reg_count() int
Backend (reg) enumeration
ggml_backend_reg_dev_count(ggml_backend_reg_t reg) int
ggml_backend_reg_dev_get(ggml_backend_reg_t reg, int index) ggml_backend_dev_t
ggml_backend_reg_get(int index) ggml_backend_reg_t
ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, Pointer<Char> name) Pointer<Void>
ggml_backend_reg_name(ggml_backend_reg_t reg) Pointer<Char>
Backend (reg)
ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, Pointer<ggml_cgraph> graph) bool
Allocate and compute graph on the backend scheduler
ggml_backend_sched_free(ggml_backend_sched_t sched) → void
ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i) ggml_backend_t
ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) int
ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched) int
ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) int
ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) int
Get the number of splits of the last graph
ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, Pointer<ggml_tensor> node) ggml_backend_t
ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, Pointer<ggml_cgraph> graph) int
ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, Pointer<ggml_cgraph> graph) int
ggml_backend_sched_new(Pointer<ggml_backend_t> backends, Pointer<ggml_backend_buffer_type_t> bufts, int n_backends, int graph_size, bool parallel) ggml_backend_sched_t
Initialize a backend scheduler, backends with low index are given priority over backends with high index
ggml_backend_sched_reserve(ggml_backend_sched_t sched, Pointer<ggml_cgraph> measure_graph) bool
Initialize backend buffers from a measure graph
ggml_backend_sched_reset(ggml_backend_sched_t sched) → void
Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph. This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers. The correct way to use this API is to discard the deallocated tensors and create new ones.
ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, Pointer<Void> user_data) → void
Set a callback to be called for each resulting node during graph compute
ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, Pointer<ggml_tensor> node, ggml_backend_t backend) → void
ggml_backend_sched_synchronize(ggml_backend_sched_t sched) → void
ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) bool
ggml_backend_supports_op(ggml_backend_t backend, Pointer<ggml_tensor> op) bool
NOTE: will be removed, use device version instead
ggml_backend_synchronize(ggml_backend_t backend) → void
ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, Pointer<ggml_tensor> tensor, Pointer<Void> addr) → void
Tensor initialization
ggml_backend_tensor_copy(Pointer<ggml_tensor> src, Pointer<ggml_tensor> dst) → void
tensor copy between different backends
ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, Pointer<ggml_tensor> src, Pointer<ggml_tensor> dst) → void
asynchronous copy the copy is performed after all the currently queued operations in backend_src backend_dst will wait for the copy to complete before performing other operations automatic fallback to sync copy if async is not supported
ggml_backend_tensor_get(Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_tensor_get_async(ggml_backend_t backend, Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_tensor_memset(Pointer<ggml_tensor> tensor, int value, int offset, int size) → void
ggml_backend_tensor_set(Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
"offset" refers to the offset in tensor->data for setting/getting data
ggml_backend_tensor_set_async(ggml_backend_t backend, Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_unload(ggml_backend_reg_t reg) → void
Unload a backend if loaded dynamically and unregister it
ggml_backend_view_init(Pointer<ggml_tensor> tensor) → void
ggml_bf16_to_fp32(ggml_bf16_t arg0) double
ggml_bf16_to_fp32_row(Pointer<ggml_bf16_t> arg0, Pointer<Float> arg1, int arg2) → void
ggml_blck_size(int type) int
ggml_build_backward_expand(Pointer<ggml_context> ctx_static, Pointer<ggml_context> ctx_compute, Pointer<ggml_cgraph> cgraph, bool accumulate) → void
ggml_build_forward_expand(Pointer<ggml_cgraph> cgraph, Pointer<ggml_tensor> tensor) → void
automatic differentiation
ggml_can_repeat(Pointer<ggml_tensor> t0, Pointer<ggml_tensor> t1) bool
ggml_cast(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int type) Pointer<ggml_tensor>
ggml_clamp(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double min, double max) Pointer<ggml_tensor>
clamp in-place, returns view(a)
ggml_concat(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int dim) Pointer<ggml_tensor>
concat a and b along dim used in stable-diffusion
ggml_cont(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
make contiguous
ggml_cont_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0) Pointer<ggml_tensor>
make contiguous, with new shape
ggml_cont_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1) Pointer<ggml_tensor>
ggml_cont_3d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2) Pointer<ggml_tensor>
ggml_cont_4d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) Pointer<ggml_tensor>
ggml_conv_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) Pointer<ggml_tensor>
ggml_conv_1d_dw(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) Pointer<ggml_tensor>
depthwise TODO: this is very likely wrong for some cases! - needs more testing
ggml_conv_1d_dw_ph(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int d0) Pointer<ggml_tensor>
ggml_conv_1d_ph(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s, int d) Pointer<ggml_tensor>
conv_1d with padding = half alias for ggml_conv_1d(a, b, s, a->ne0/2, d)
ggml_conv_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1) Pointer<ggml_tensor>
ggml_conv_2d_dw(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1) Pointer<ggml_tensor>
depthwise
ggml_conv_2d_s1_ph(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
kernel size is a->ne0 x a->ne1 stride is 1 padding is half example: a: 3 3 256 256 b: 64 64 256 1 res: 64 64 256 1 used in sam
ggml_conv_2d_sk_p0(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
kernel size is a->ne0 x a->ne1 stride is equal to kernel size padding is zero example: a: 16 16 3 768 b: 1024 1024 3 1 res: 64 64 768 1 used in sam
ggml_conv_transpose_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) Pointer<ggml_tensor>
ggml_conv_transpose_2d_p0(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int stride) Pointer<ggml_tensor>
ggml_cos(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_cos_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_count_equal(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
count number of equal elements in a and b
ggml_cpu_get_sve_cnt() int
ggml_cpu_has_amx_int8() int
ggml_cpu_has_arm_fma() int
ggml_cpu_has_avx() int
ggml_cpu_has_avx2() int
ggml_cpu_has_avx512() int
ggml_cpu_has_avx512_bf16() int
ggml_cpu_has_avx512_vbmi() int
ggml_cpu_has_avx512_vnni() int
ggml_cpu_has_avx_vnni() int
ggml_cpu_has_dotprod() int
ggml_cpu_has_f16c() int
ggml_cpu_has_fma() int
ggml_cpu_has_fp16_va() int
ggml_cpu_has_llamafile() int
ggml_cpu_has_matmul_int8() int
ggml_cpu_has_neon() int
ARM
ggml_cpu_has_riscv_v() int
other
ggml_cpu_has_sse3() int
x86
ggml_cpu_has_ssse3() int
ggml_cpu_has_sve() int
ggml_cpu_has_vsx() int
ggml_cpu_has_wasm_simd() int
ggml_cpu_init() → void
ggml_cpy(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
a -> b, return view(b)
ggml_cross_entropy_loss(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
loss function
ggml_cross_entropy_loss_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c) Pointer<ggml_tensor>
ggml_cycles() int
ggml_cycles_per_ms() int
ggml_diag(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_diag_mask_inf(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) Pointer<ggml_tensor>
set elements above the diagonal to -INF
ggml_diag_mask_inf_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) Pointer<ggml_tensor>
in-place, returns view(a)
ggml_diag_mask_zero(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) Pointer<ggml_tensor>
set elements above the diagonal to 0
ggml_diag_mask_zero_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) Pointer<ggml_tensor>
in-place, returns view(a)
ggml_div(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
ggml_div_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
ggml_dup(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
operations on tensors with backpropagation
ggml_dup_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
in-place, returns view(a)
ggml_dup_tensor(Pointer<ggml_context> ctx, Pointer<ggml_tensor> src) Pointer<ggml_tensor>
ggml_element_size(Pointer<ggml_tensor> tensor) int
ggml_elu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_elu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_exp(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_exp_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_flash_attn_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> q, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> d, bool masked) Pointer<ggml_tensor>
TODO: needs to be adapted to ggml_flash_attn_ext
ggml_flash_attn_ext(Pointer<ggml_context> ctx, Pointer<ggml_tensor> q, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> mask, double scale, double max_bias, double logit_softcap) Pointer<ggml_tensor>
q: n_embd, n_batch, n_head, 1 k: n_embd, n_kv, n_head_kv, 1 v: n_embd, n_kv, n_head_kv, 1 !! not transposed !! mask: n_kv, n_batch_pad, 1, 1 !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !! res: n_embd, n_head, n_batch, 1 !! permuted !!
ggml_flash_attn_ext_get_prec(Pointer<ggml_tensor> a) int
ggml_flash_attn_ext_set_prec(Pointer<ggml_tensor> a, int prec) → void
ggml_fopen(Pointer<Char> fname, Pointer<Char> mode) Pointer<FILE>
accepts a UTF-8 path, even on Windows
ggml_format_name(Pointer<ggml_tensor> tensor, Pointer<Char> fmt) Pointer<ggml_tensor>
ggml_fp16_to_fp32(int arg0) double
ggml_fp16_to_fp32_row(Pointer<ggml_fp16_t> arg0, Pointer<Float> arg1, int arg2) → void
ggml_fp32_to_bf16(double arg0) ggml_bf16_t
ggml_fp32_to_bf16_row(Pointer<Float> arg0, Pointer<ggml_bf16_t> arg1, int arg2) → void
ggml_fp32_to_bf16_row_ref(Pointer<Float> arg0, Pointer<ggml_bf16_t> arg1, int arg2) → void
ggml_fp32_to_fp16(double arg0) int
ggml_fp32_to_fp16_row(Pointer<Float> arg0, Pointer<ggml_fp16_t> arg1, int arg2) → void
ggml_free(Pointer<ggml_context> ctx) → void
ggml_ftype_to_ggml_type(int ftype) int
TODO: temporary until model loading of ggml examples is refactored
ggml_gated_linear_attn(Pointer<ggml_context> ctx, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> q, Pointer<ggml_tensor> g, Pointer<ggml_tensor> state, double scale) Pointer<ggml_tensor>
ggml_gelu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_gelu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_gelu_quick(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_gelu_quick_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_get_data(Pointer<ggml_tensor> tensor) Pointer<Void>
ggml_get_data_f32(Pointer<ggml_tensor> tensor) Pointer<Float>
ggml_get_f32_1d(Pointer<ggml_tensor> tensor, int i) double
ggml_get_f32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3) double
ggml_get_first_tensor(Pointer<ggml_context> ctx) Pointer<ggml_tensor>
Context tensor enumeration and lookup
ggml_get_i32_1d(Pointer<ggml_tensor> tensor, int i) int
ggml_get_i32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3) int
ggml_get_max_tensor_size(Pointer<ggml_context> ctx) int
ggml_get_mem_buffer(Pointer<ggml_context> ctx) Pointer<Void>
ggml_get_mem_size(Pointer<ggml_context> ctx) int
ggml_get_name(Pointer<ggml_tensor> tensor) Pointer<Char>
ggml_get_next_tensor(Pointer<ggml_context> ctx, Pointer<ggml_tensor> tensor) Pointer<ggml_tensor>
ggml_get_no_alloc(Pointer<ggml_context> ctx) bool
ggml_get_rel_pos(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int qh, int kh) Pointer<ggml_tensor>
used in sam
ggml_get_rows(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
supports 3D: a->ne2 == b->ne1
ggml_get_rows_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c) Pointer<ggml_tensor>
ggml_get_tensor(Pointer<ggml_context> ctx, Pointer<Char> name) Pointer<ggml_tensor>
ggml_get_type_traits(int type) Pointer<ggml_type_traits>
ggml_get_type_traits_cpu(int type) Pointer<ggml_type_traits_cpu>
ggml_get_unary_op(Pointer<ggml_tensor> tensor) int
ggml_graph_add_node(Pointer<ggml_cgraph> cgraph, Pointer<ggml_tensor> tensor) → void
ggml_graph_clear(Pointer<ggml_cgraph> cgraph) → void
ggml_graph_compute(Pointer<ggml_cgraph> cgraph, Pointer<ggml_cplan> cplan) int
ggml_graph_compute_with_ctx(Pointer<ggml_context> ctx, Pointer<ggml_cgraph> cgraph, int n_threads) int
same as ggml_graph_compute() but the work data is allocated as a part of the context note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
ggml_graph_cpy(Pointer<ggml_cgraph> src, Pointer<ggml_cgraph> dst) → void
ggml_graph_dump_dot(Pointer<ggml_cgraph> gb, Pointer<ggml_cgraph> gf, Pointer<Char> filename) → void
dump the graph into a file using the dot format
ggml_graph_dup(Pointer<ggml_context> ctx, Pointer<ggml_cgraph> cgraph) Pointer<ggml_cgraph>
ggml_graph_export(Pointer<ggml_cgraph> cgraph, Pointer<Char> fname) → void
ggml_graph_get_grad(Pointer<ggml_cgraph> cgraph, Pointer<ggml_tensor> node) Pointer<ggml_tensor>
ggml_graph_get_grad_acc(Pointer<ggml_cgraph> cgraph, Pointer<ggml_tensor> node) Pointer<ggml_tensor>
ggml_graph_get_tensor(Pointer<ggml_cgraph> cgraph, Pointer<Char> name) Pointer<ggml_tensor>
ggml_graph_import(Pointer<Char> fname, Pointer<Pointer<ggml_context>> ctx_data, Pointer<Pointer<ggml_context>> ctx_eval) Pointer<ggml_cgraph>
ggml_graph_n_nodes(Pointer<ggml_cgraph> cgraph) int
ggml_graph_node(Pointer<ggml_cgraph> cgraph, int i) Pointer<ggml_tensor>
ggml_graph_nodes(Pointer<ggml_cgraph> cgraph) Pointer<Pointer<ggml_tensor>>
ggml_graph_overhead() int
ggml_graph_overhead_custom(int size, bool grads) int
ggml_graph_plan(Pointer<ggml_cgraph> cgraph, int n_threads, Pointer<ggml_threadpool> threadpool) ggml_cplan
ggml_graph_plan() has to be called before ggml_graph_compute() when plan.work_size > 0, caller must allocate memory for plan.work_data
ggml_graph_print(Pointer<ggml_cgraph> cgraph) → void
print info and performance information for the graph
ggml_graph_reset(Pointer<ggml_cgraph> cgraph) → void
ggml_graph_size(Pointer<ggml_cgraph> cgraph) int
ggml_group_norm(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_groups, double eps) Pointer<ggml_tensor>
group normalize along ne0ne1n_groups used in stable-diffusion
ggml_group_norm_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_groups, double eps) Pointer<ggml_tensor>
ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) bool
ggml_hardsigmoid(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
hardsigmoid(x) = relu6(x + 3) / 6
ggml_hardswish(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
hardswish(x) = x * relu6(x + 3) / 6
ggml_im2col(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1, bool is_2D, int dst_type) Pointer<ggml_tensor>
im2col converts data into a format that effectively results in a convolution when combined with matrix multiplication
ggml_im2col_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<Int64> ne, int s0, int s1, int p0, int p1, int d0, int d1, bool is_2D) Pointer<ggml_tensor>
ggml_init(ggml_init_params params) Pointer<ggml_context>
main
ggml_is_3d(Pointer<ggml_tensor> tensor) bool
ggml_is_contiguous(Pointer<ggml_tensor> tensor) bool
ggml_is_contiguous_0(Pointer<ggml_tensor> tensor) bool
ggml_is_contiguous_1(Pointer<ggml_tensor> tensor) bool
ggml_is_contiguous_2(Pointer<ggml_tensor> tensor) bool
ggml_is_empty(Pointer<ggml_tensor> tensor) bool
ggml_is_matrix(Pointer<ggml_tensor> tensor) bool
ggml_is_numa() bool
ggml_is_permuted(Pointer<ggml_tensor> tensor) bool
ggml_is_quantized(int type) bool
ggml_is_scalar(Pointer<ggml_tensor> tensor) bool
ggml_is_transposed(Pointer<ggml_tensor> tensor) bool
ggml_is_vector(Pointer<ggml_tensor> tensor) bool
ggml_leaky_relu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double negative_slope, bool inplace) Pointer<ggml_tensor>
ggml_log(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_log_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_log_set(ggml_log_callback log_callback, Pointer<Void> user_data) → void
Set callback for all future logging events. If this is not called, or NULL is supplied, everything is output on stderr.
ggml_map_binary_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_binary_op_f32_t fun) Pointer<ggml_tensor>
ggml_map_binary_inplace_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_binary_op_f32_t fun) Pointer<ggml_tensor>
ggml_map_custom1(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_t fun, int n_tasks, Pointer<Void> userdata) Pointer<ggml_tensor>
n_tasks == GGML_N_TASKS_MAX means to use max number of tasks
ggml_map_custom1_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_f32_t fun) Pointer<ggml_tensor>
ggml_map_custom1_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_t fun, int n_tasks, Pointer<Void> userdata) Pointer<ggml_tensor>
ggml_map_custom1_inplace_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_f32_t fun) Pointer<ggml_tensor>
ggml_map_custom2(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_t fun, int n_tasks, Pointer<Void> userdata) Pointer<ggml_tensor>
ggml_map_custom2_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_f32_t fun) Pointer<ggml_tensor>
ggml_map_custom2_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_t fun, int n_tasks, Pointer<Void> userdata) Pointer<ggml_tensor>
ggml_map_custom2_inplace_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_f32_t fun) Pointer<ggml_tensor>
ggml_map_custom3(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_t fun, int n_tasks, Pointer<Void> userdata) Pointer<ggml_tensor>
ggml_map_custom3_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_f32_t fun) Pointer<ggml_tensor>
ggml_map_custom3_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_t fun, int n_tasks, Pointer<Void> userdata) Pointer<ggml_tensor>
ggml_map_custom3_inplace_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_f32_t fun) Pointer<ggml_tensor>
ggml_map_unary_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op_f32_t fun) Pointer<ggml_tensor>
ggml_map_unary_inplace_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op_f32_t fun) Pointer<ggml_tensor>
ggml_mean(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
mean along rows
ggml_mul(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
ggml_mul_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
ggml_mul_mat(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
A: k columns, n rows => ne03, ne02, n, k B: k columns, m rows (i.e. we transpose it internally) => ne03 * x, ne02 * y, m, k result is n columns, m rows => ne03 * x, ne02 * y, m, n
ggml_mul_mat_id(Pointer<ggml_context> ctx, Pointer<ggml_tensor> as1, Pointer<ggml_tensor> b, Pointer<ggml_tensor> ids) Pointer<ggml_tensor>
indirect matrix multiplication
ggml_mul_mat_set_prec(Pointer<ggml_tensor> a, int prec) → void
change the precision of a matrix multiplication set to GGML_PREC_F32 for higher precision (useful for phi-2)
ggml_n_dims(Pointer<ggml_tensor> tensor) int
ggml_nbytes(Pointer<ggml_tensor> tensor) int
ggml_nbytes_pad(Pointer<ggml_tensor> tensor) int
ggml_neg(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_neg_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
ggml_nelements(Pointer<ggml_tensor> tensor) int
ggml_new_buffer(Pointer<ggml_context> ctx, int nbytes) Pointer<Void>
ggml_new_f32(Pointer<ggml_context> ctx, double value) Pointer<ggml_tensor>
ggml_new_graph(Pointer<ggml_context> ctx) Pointer<ggml_cgraph>
graph allocation in a context
ggml_new_graph_custom(Pointer<ggml_context> ctx, int size, bool grads) Pointer<ggml_cgraph>
ggml_new_i32(Pointer<ggml_context> ctx, int value) Pointer<ggml_tensor>
ggml_new_tensor(Pointer<ggml_context> ctx, int type, int n_dims, Pointer<Int64> ne) Pointer<ggml_tensor>
ggml_new_tensor_1d(Pointer<ggml_context> ctx, int type, int ne0) Pointer<ggml_tensor>
ggml_new_tensor_2d(Pointer<ggml_context> ctx, int type, int ne0, int ne1) Pointer<ggml_tensor>
ggml_new_tensor_3d(Pointer<ggml_context> ctx, int type, int ne0, int ne1, int ne2) Pointer<ggml_tensor>
ggml_new_tensor_4d(Pointer<ggml_context> ctx, int type, int ne0, int ne1, int ne2, int ne3) Pointer<ggml_tensor>
ggml_norm(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) Pointer<ggml_tensor>
normalize along rows
ggml_norm_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) Pointer<ggml_tensor>
ggml_nrows(Pointer<ggml_tensor> tensor) int
ggml_numa_init(int numa) → void
ggml_op_desc(Pointer<ggml_tensor> t) Pointer<Char>
ggml_op_name(int op) Pointer<Char>
ggml_op_symbol(int op) Pointer<Char>
ggml_opt_step_adamw(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> grad, Pointer<ggml_tensor> m, Pointer<ggml_tensor> v, Pointer<ggml_tensor> adamw_params) Pointer<ggml_tensor>
AdamW optimizer step Paper: https://arxiv.org/pdf/1711.05101v3.pdf PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
ggml_out_prod(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
A: m columns, n rows, B: p columns, n rows, result is m columns, p rows
ggml_pad(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int p0, int p1, int p2, int p3) Pointer<ggml_tensor>
pad each dimension with zeros: x, ..., x -> x, ..., x, 0, ..., 0
ggml_pad_reflect_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int p0, int p1) Pointer<ggml_tensor>
pad each dimension with reflection: a, b, c, d -> b, a, b, c, d, c
ggml_permute(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int axis0, int axis1, int axis2, int axis3) Pointer<ggml_tensor>
ggml_pool_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int op, int k0, int s0, int p0) Pointer<ggml_tensor>
ggml_pool_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int op, int k0, int k1, int s0, int s1, double p0, double p1) Pointer<ggml_tensor>
the result will have 2p0 padding for the first dimension and 2p1 padding for the second dimension
ggml_pool_2d_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> af, int op, int k0, int k1, int s0, int s1, double p0, double p1) Pointer<ggml_tensor>
ggml_print_object(Pointer<ggml_object> obj) → void
ggml_print_objects(Pointer<ggml_context> ctx) → void
ggml_quantize_chunk(int type, Pointer<Float> src, Pointer<Void> dst, int start, int nrows, int n_per_row, Pointer<Float> imatrix) int
calls ggml_quantize_init internally (i.e. can allocate memory)
ggml_quantize_free() → void
ggml_quantize_init(int type) → void
  • ggml_quantize_init can be called multiple times with the same type it will only initialize the quantization tables for the first call or after ggml_quantize_free automatically called by ggml_quantize_chunk for convenience

  • ggml_quantize_free will free any memory allocated by ggml_quantize_init call this at the end of the program to avoid memory leaks

  • ggml_quantize_requires_imatrix(int type) bool
    some quantization type cannot be used without an importance matrix
    ggml_relu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_relu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_repeat(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
    if a is the same shape as b, and a is not parameter, return a otherwise, return a new tensor: repeat(a) to fit in b
    ggml_repeat_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
    sums repetitions in a into shape of b
    ggml_reset(Pointer<ggml_context> ctx) → void
    ggml_reshape(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
    return view(a), b specifies the new shape TODO: when we start computing gradient, make a copy instead of view
    ggml_reshape_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0) Pointer<ggml_tensor>
    return view(a) TODO: when we start computing gradient, make a copy instead of view
    ggml_reshape_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1) Pointer<ggml_tensor>
    ggml_reshape_3d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2) Pointer<ggml_tensor>
    return view(a) TODO: when we start computing gradient, make a copy instead of view
    ggml_reshape_4d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) Pointer<ggml_tensor>
    ggml_rms_norm(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) Pointer<ggml_tensor>
    ggml_rms_norm_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double eps) Pointer<ggml_tensor>
    a - x b - dy
    ggml_rms_norm_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) Pointer<ggml_tensor>
    ggml_rope(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode) Pointer<ggml_tensor>
    rotary position embedding if (mode & 1) - skip n_past elements (NOT SUPPORTED) if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
    ggml_rope_custom(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) Pointer<ggml_tensor>
    ggml_rope_custom_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) Pointer<ggml_tensor>
    ggml_rope_ext(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) Pointer<ggml_tensor>
    custom RoPE c is freq factors (e.g. phi3-128k), (optional)
    ggml_rope_ext_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) Pointer<ggml_tensor>
    rotary position embedding backward, i.e compute dx from dy a - dy
    ggml_rope_ext_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) Pointer<ggml_tensor>
    in-place, returns view(a)
    ggml_rope_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode) Pointer<ggml_tensor>
    in-place, returns view(a)
    ggml_rope_multi(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, Pointer<Int> sections, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) Pointer<ggml_tensor>
    ggml_rope_multi_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, Pointer<Int> sections, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) Pointer<ggml_tensor>
    ggml_rope_yarn_corr_dims(int n_dims, int n_ctx_orig, double freq_base, double beta_fast, double beta_slow, Pointer<Float> dims) → void
    compute correction dims for YaRN RoPE scaling
    ggml_row_size(int type, int ne) int
    ggml_rwkv_wkv6(Pointer<ggml_context> ctx, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> r, Pointer<ggml_tensor> tf, Pointer<ggml_tensor> td, Pointer<ggml_tensor> state) Pointer<ggml_tensor>
    ggml_scale(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double s) Pointer<ggml_tensor>
    operations on tensors without backpropagation
    ggml_scale_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double s) Pointer<ggml_tensor>
    in-place, returns view(a)
    ggml_set(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) Pointer<ggml_tensor>
    b -> view(a,offset,nb1,nb2,3), return modified a
    ggml_set_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int offset) Pointer<ggml_tensor>
    ggml_set_1d_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int offset) Pointer<ggml_tensor>
    ggml_set_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int offset) Pointer<ggml_tensor>
    b -> view(a,offset,nb1,nb2,3), return modified a
    ggml_set_2d_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int offset) Pointer<ggml_tensor>
    b -> view(a,offset,nb1,nb2,3), return view(a)
    ggml_set_f32(Pointer<ggml_tensor> tensor, double value) Pointer<ggml_tensor>
    ggml_set_f32_1d(Pointer<ggml_tensor> tensor, int i, double value) → void
    ggml_set_f32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3, double value) → void
    ggml_set_i32(Pointer<ggml_tensor> tensor, int value) Pointer<ggml_tensor>
    ggml_set_i32_1d(Pointer<ggml_tensor> tensor, int i, int value) → void
    ggml_set_i32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3, int value) → void
    ggml_set_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) Pointer<ggml_tensor>
    b -> view(a,offset,nb1,nb2,3), return view(a)
    ggml_set_input(Pointer<ggml_tensor> tensor) → void
    Tensor flags
    ggml_set_loss(Pointer<ggml_tensor> tensor) → void
    ggml_set_name(Pointer<ggml_tensor> tensor, Pointer<Char> name) Pointer<ggml_tensor>
    ggml_set_no_alloc(Pointer<ggml_context> ctx, bool no_alloc) → void
    ggml_set_output(Pointer<ggml_tensor> tensor) → void
    ggml_set_param(Pointer<ggml_context> ctx, Pointer<ggml_tensor> tensor) → void
    ggml_set_zero(Pointer<ggml_tensor> tensor) Pointer<ggml_tensor>
    ggml_sgn(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_sgn_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_sigmoid(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_sigmoid_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_silu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_silu_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
    a - x b - dy
    ggml_silu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_sin(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_sin_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_soft_max(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_soft_max_ext(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> mask, double scale, double max_bias) Pointer<ggml_tensor>
    fused soft_max(ascale + mask(ALiBi slope)) mask is optional max_bias = 0.0f for no ALiBi
    ggml_soft_max_ext_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double scale, double max_bias) Pointer<ggml_tensor>
    ggml_soft_max_ext_back_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double scale, double max_bias) Pointer<ggml_tensor>
    in-place, returns view(a)
    ggml_soft_max_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    in-place, returns view(a)
    ggml_sqr(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_sqr_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_sqrt(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_sqrt_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_ssm_conv(Pointer<ggml_context> ctx, Pointer<ggml_tensor> sx, Pointer<ggml_tensor> c) Pointer<ggml_tensor>
    ggml_ssm_scan(Pointer<ggml_context> ctx, Pointer<ggml_tensor> s, Pointer<ggml_tensor> x, Pointer<ggml_tensor> dt, Pointer<ggml_tensor> A, Pointer<ggml_tensor> B, Pointer<ggml_tensor> C) Pointer<ggml_tensor>
    ggml_status_to_string(int status) Pointer<Char>
    get ggml_status name string
    ggml_step(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_step_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_sub(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
    ggml_sub_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) Pointer<ggml_tensor>
    ggml_sum(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    return scalar
    ggml_sum_rows(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    sums along rows, with input shape a,b,c,d return shape 1,b,c,d
    ggml_tanh(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_tanh_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    ggml_tensor_overhead() int
    use this to compute the memory overhead of a tensor
    ggml_threadpool_free(Pointer<ggml_threadpool> threadpool) → void
    ggml_threadpool_get_n_threads(Pointer<ggml_threadpool> threadpool) int
    ggml_threadpool_new(Pointer<ggml_threadpool_params> params) Pointer<ggml_threadpool>
    ggml_threadpool_params_default(int n_threads) ggml_threadpool_params
    ggml_threadpool_params_init(Pointer<ggml_threadpool_params> p, int n_threads) → void
    ggml_threadpool_params_match(Pointer<ggml_threadpool_params> p0, Pointer<ggml_threadpool_params> p1) bool
    ggml_threadpool_pause(Pointer<ggml_threadpool> threadpool) → void
    ggml_threadpool_resume(Pointer<ggml_threadpool> threadpool) → void
    ggml_time_init() → void
    misc
    ggml_time_ms() int
    ggml_time_us() int
    ggml_timestep_embedding(Pointer<ggml_context> ctx, Pointer<ggml_tensor> timesteps, int dim, int max_period) Pointer<ggml_tensor>
    Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151 timesteps: N, return: N, dim
    ggml_top_k(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int k) Pointer<ggml_tensor>
    top k elements per row
    ggml_transpose(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) Pointer<ggml_tensor>
    alias for ggml_permute(ctx, a, 1, 0, 2, 3)
    ggml_type_name(int type) Pointer<Char>
    ggml_type_size(int type) int
    ggml_type_sizef(int type) double
    ggml_unary(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int op) Pointer<ggml_tensor>
    ggml_unary_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int op) Pointer<ggml_tensor>
    ggml_unary_op_name(int op) Pointer<Char>
    ggml_unravel_index(Pointer<ggml_tensor> tensor, int i, Pointer<Int64> i0, Pointer<Int64> i1, Pointer<Int64> i2, Pointer<Int64> i3) → void
    Converts a flat index into coordinates
    ggml_upscale(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int scale_factor) Pointer<ggml_tensor>
    nearest interpolate multiplies ne0 and ne1 by scale factor used in stable-diffusion
    ggml_upscale_ext(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) Pointer<ggml_tensor>
    nearest interpolate nearest interpolate to specified dimensions used in tortoise.cpp
    ggml_used_mem(Pointer<ggml_context> ctx) int
    ggml_validate_row_data(int type, Pointer<Void> data, int nbytes) bool
    ggml_view_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int offset) Pointer<ggml_tensor>
    offset in bytes
    ggml_view_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int nb1, int offset) Pointer<ggml_tensor>
    ggml_view_3d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int nb1, int nb2, int offset) Pointer<ggml_tensor>
    ggml_view_4d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3, int nb1, int nb2, int nb3, int offset) Pointer<ggml_tensor>
    ggml_view_tensor(Pointer<ggml_context> ctx, Pointer<ggml_tensor> src) Pointer<ggml_tensor>
    ggml_win_part(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int w) Pointer<ggml_tensor>
    partition into non-overlapping windows with padding if needed example: a: 768 64 64 1 w: 14 res: 768 14 14 25 used in sam
    ggml_win_unpart(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int w0, int h0, int w) Pointer<ggml_tensor>
    reverse of ggml_win_part used in sam
    noSuchMethod(Invocation invocation) → dynamic
    Invoked when a nonexistent method or property is accessed.
    inherited
    toString() String
    A string representation of this object.
    inherited
    whisper_bench_ggml_mul_mat(int n_threads) int
    whisper_bench_ggml_mul_mat_str(int n_threads) Pointer<Char>
    whisper_bench_memcpy(int n_threads) int
    Temporary helpers needed for exposing ggml interface
    whisper_bench_memcpy_str(int n_threads) Pointer<Char>
    whisper_context_default_params() whisper_context_params
    whisper_context_default_params_by_ref() Pointer<whisper_context_params>
    NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
    whisper_ctx_init_openvino_encoder(Pointer<whisper_context> ctx, Pointer<Char> model_path, Pointer<Char> device, Pointer<Char> cache_dir) int
    whisper_ctx_init_openvino_encoder_with_state(Pointer<whisper_context> ctx, Pointer<whisper_state> state, Pointer<Char> model_path, Pointer<Char> device, Pointer<Char> cache_dir) int
    Given a context, enable use of OpenVINO for encode inference. model_path: Optional path to OpenVINO encoder IR model. If set to nullptr, the path will be generated from the ggml model path that was passed in to whisper_init_from_file. For example, if 'path_model' was "/path/to/ggml-base.en.bin", then OpenVINO IR model path will be assumed to be "/path/to/ggml-base.en-encoder-openvino.xml". device: OpenVINO device to run inference on ("CPU", "GPU", etc.) cache_dir: Optional cache directory that can speed up init time, especially for GPU, by caching compiled 'blobs' there. Set to nullptr if not used. Returns 0 on success. If OpenVINO is not enabled in build, this simply returns 1.
    whisper_decode(Pointer<whisper_context> ctx, Pointer<whisper_token> tokens, int n_tokens, int n_past, int n_threads) int
    Run the Whisper decoder to obtain the logits and probabilities for the next token. Make sure to call whisper_encode() first. tokens + n_tokens is the provided context for the decoder. n_past is the number of tokens to use from previous decoder calls. Returns 0 on success TODO: add support for multiple decoders
    whisper_decode_with_state(Pointer<whisper_context> ctx, Pointer<whisper_state> state, Pointer<whisper_token> tokens, int n_tokens, int n_past, int n_threads) int
    whisper_encode(Pointer<whisper_context> ctx, int offset, int n_threads) int
    Run the Whisper encoder on the log mel spectrogram stored inside the default state in the provided whisper context. Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first. offset can be used to specify the offset of the first frame in the spectrogram. Returns 0 on success
    whisper_encode_with_state(Pointer<whisper_context> ctx, Pointer<whisper_state> state, int offset, int n_threads) int
    whisper_free(Pointer<whisper_context> ctx) → void
    Frees all allocated memory
    whisper_free_context_params(Pointer<whisper_context_params> params) → void
    whisper_free_params(Pointer<whisper_full_params> params) → void
    whisper_free_state(Pointer<whisper_state> state) → void
    whisper_full(Pointer<whisper_context> ctx, whisper_full_params params, Pointer<Float> samples, int n_samples) int
    Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text Not thread safe for same context Uses the specified decoding strategy to obtain the text.
    whisper_full_default_params(int strategy) whisper_full_params
    whisper_full_default_params_by_ref(int strategy) Pointer<whisper_full_params>
    whisper_full_get_segment_no_speech_prob(Pointer<whisper_context> ctx, int i_segment) double
    Get the no_speech probability for the specified segment
    whisper_full_get_segment_no_speech_prob_from_state(Pointer<whisper_state> state, int i_segment) double
    whisper_full_get_segment_speaker_turn_next(Pointer<whisper_context> ctx, int i_segment) bool
    Get whether the next segment is predicted as a speaker turn
    whisper_full_get_segment_speaker_turn_next_from_state(Pointer<whisper_state> state, int i_segment) bool
    whisper_full_get_segment_t0(Pointer<whisper_context> ctx, int i_segment) int
    Get the start and end time of the specified segment
    whisper_full_get_segment_t0_from_state(Pointer<whisper_state> state, int i_segment) int
    whisper_full_get_segment_t1(Pointer<whisper_context> ctx, int i_segment) int
    whisper_full_get_segment_t1_from_state(Pointer<whisper_state> state, int i_segment) int
    whisper_full_get_segment_text(Pointer<whisper_context> ctx, int i_segment) Pointer<Char>
    Get the text of the specified segment
    whisper_full_get_segment_text_from_state(Pointer<whisper_state> state, int i_segment) Pointer<Char>
    whisper_full_get_token_data(Pointer<whisper_context> ctx, int i_segment, int i_token) whisper_token_data
    Get token data for the specified token in the specified segment This contains probabilities, timestamps, etc.
    whisper_full_get_token_data_from_state(Pointer<whisper_state> state, int i_segment, int i_token) whisper_token_data
    whisper_full_get_token_id(Pointer<whisper_context> ctx, int i_segment, int i_token) int
    whisper_full_get_token_id_from_state(Pointer<whisper_state> state, int i_segment, int i_token) int
    whisper_full_get_token_p(Pointer<whisper_context> ctx, int i_segment, int i_token) double
    Get the probability of the specified token in the specified segment
    whisper_full_get_token_p_from_state(Pointer<whisper_state> state, int i_segment, int i_token) double
    whisper_full_get_token_text(Pointer<whisper_context> ctx, int i_segment, int i_token) Pointer<Char>
    Get the token text of the specified token in the specified segment
    whisper_full_get_token_text_from_state(Pointer<whisper_context> ctx, Pointer<whisper_state> state, int i_segment, int i_token) Pointer<Char>
    whisper_full_lang_id(Pointer<whisper_context> ctx) int
    Language id associated with the context's default state
    whisper_full_lang_id_from_state(Pointer<whisper_state> state) int
    Language id associated with the provided state
    whisper_full_n_segments(Pointer<whisper_context> ctx) int
    Number of generated text segments A segment can be a few words, a sentence, or even a paragraph.
    whisper_full_n_segments_from_state(Pointer<whisper_state> state) int
    whisper_full_n_tokens(Pointer<whisper_context> ctx, int i_segment) int
    Get number of tokens in the specified segment
    whisper_full_n_tokens_from_state(Pointer<whisper_state> state, int i_segment) int
    whisper_full_parallel(Pointer<whisper_context> ctx, whisper_full_params params, Pointer<Float> samples, int n_samples, int n_processors) int
    Split the input audio in chunks and process each chunk separately using whisper_full_with_state() Result is stored in the default state of the context Not thread safe if executed in parallel on the same context. It seems this approach can offer some speedup in some cases. However, the transcription accuracy can be worse at the beginning and end of each chunk.
    whisper_full_with_state(Pointer<whisper_context> ctx, Pointer<whisper_state> state, whisper_full_params params, Pointer<Float> samples, int n_samples) int
    whisper_get_logits(Pointer<whisper_context> ctx) Pointer<Float>
    Token logits obtained from the last call to whisper_decode() The logits for the last token are stored in the last row Rows: n_tokens Cols: n_vocab
    whisper_get_logits_from_state(Pointer<whisper_state> state) Pointer<Float>
    whisper_get_timings(Pointer<whisper_context> ctx) Pointer<whisper_timings>
    whisper_init(Pointer<whisper_model_loader> loader) Pointer<whisper_context>
    whisper_init_from_buffer(Pointer<Void> buffer, int buffer_size) Pointer<whisper_context>
    whisper_init_from_buffer_no_state(Pointer<Void> buffer, int buffer_size) Pointer<whisper_context>
    whisper_init_from_buffer_with_params(Pointer<Void> buffer, int buffer_size, whisper_context_params params) Pointer<whisper_context>
    whisper_init_from_buffer_with_params_no_state(Pointer<Void> buffer, int buffer_size, whisper_context_params params) Pointer<whisper_context>
    whisper_init_from_file(Pointer<Char> path_model) Pointer<whisper_context>
    whisper_init_from_file_no_state(Pointer<Char> path_model) Pointer<whisper_context>
    whisper_init_from_file_with_params(Pointer<Char> path_model, whisper_context_params params) Pointer<whisper_context>
    Various functions for loading a ggml whisper model. Allocate (almost) all memory needed for the model. Return NULL on failure
    whisper_init_from_file_with_params_no_state(Pointer<Char> path_model, whisper_context_params params) Pointer<whisper_context>
    These are the same as the above, but the internal state of the context is not allocated automatically It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
    whisper_init_no_state(Pointer<whisper_model_loader> loader) Pointer<whisper_context>
    whisper_init_state(Pointer<whisper_context> ctx) Pointer<whisper_state>
    whisper_init_with_params(Pointer<whisper_model_loader> loader, whisper_context_params params) Pointer<whisper_context>
    whisper_init_with_params_no_state(Pointer<whisper_model_loader> loader, whisper_context_params params) Pointer<whisper_context>
    whisper_is_multilingual(Pointer<whisper_context> ctx) int
    whisper_lang_auto_detect(Pointer<whisper_context> ctx, int offset_ms, int n_threads, Pointer<Float> lang_probs) int
    Use mel data at offset_ms to try and auto-detect the spoken language Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first Returns the top language id or negative on failure If not null, fills the lang_probs array with the probabilities of all languages The array must be whisper_lang_max_id() + 1 in size ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
    whisper_lang_auto_detect_with_state(Pointer<whisper_context> ctx, Pointer<whisper_state> state, int offset_ms, int n_threads, Pointer<Float> lang_probs) int
    whisper_lang_id(Pointer<Char> lang) int
    Return the id of the specified language, returns -1 if not found Examples: "de" -> 2 "german" -> 2
    whisper_lang_max_id() int
    Largest language id (i.e. number of available languages - 1)
    whisper_lang_str(int id) Pointer<Char>
    Return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found
    whisper_lang_str_full(int id) Pointer<Char>
    Return the short string of the specified language name (e.g. 2 -> "german"), returns nullptr if not found
    whisper_log_set(int log_callback, Pointer<Void> user_data) → void
    Control logging output; default behavior is to print to stderr
    whisper_model_ftype(Pointer<whisper_context> ctx) int
    whisper_model_n_audio_ctx(Pointer<whisper_context> ctx) int
    whisper_model_n_audio_head(Pointer<whisper_context> ctx) int
    whisper_model_n_audio_layer(Pointer<whisper_context> ctx) int
    whisper_model_n_audio_state(Pointer<whisper_context> ctx) int
    whisper_model_n_mels(Pointer<whisper_context> ctx) int
    whisper_model_n_text_ctx(Pointer<whisper_context> ctx) int
    whisper_model_n_text_head(Pointer<whisper_context> ctx) int
    whisper_model_n_text_layer(Pointer<whisper_context> ctx) int
    whisper_model_n_text_state(Pointer<whisper_context> ctx) int
    whisper_model_n_vocab(Pointer<whisper_context> ctx) int
    whisper_model_type(Pointer<whisper_context> ctx) int
    whisper_model_type_readable(Pointer<whisper_context> ctx) Pointer<Char>
    whisper_n_audio_ctx(Pointer<whisper_context> ctx) int
    whisper_n_len(Pointer<whisper_context> ctx) int
    whisper_n_len_from_state(Pointer<whisper_state> state) int
    whisper_n_text_ctx(Pointer<whisper_context> ctx) int
    whisper_n_vocab(Pointer<whisper_context> ctx) int
    whisper_pcm_to_mel(Pointer<whisper_context> ctx, Pointer<Float> samples, int n_samples, int n_threads) int
    Convert RAW PCM audio to log mel spectrogram. The resulting spectrogram is stored inside the default state of the provided whisper context. Returns 0 on success
    whisper_pcm_to_mel_with_state(Pointer<whisper_context> ctx, Pointer<whisper_state> state, Pointer<Float> samples, int n_samples, int n_threads) int
    whisper_print_system_info() Pointer<Char>
    Print system information
    whisper_print_timings(Pointer<whisper_context> ctx) → void
    whisper_reset_timings(Pointer<whisper_context> ctx) → void
    whisper_set_mel(Pointer<whisper_context> ctx, Pointer<Float> data, int n_len, int n_mel) int
    This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context. Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram. n_mel must be 80 Returns 0 on success
    whisper_set_mel_with_state(Pointer<whisper_context> ctx, Pointer<whisper_state> state, Pointer<Float> data, int n_len, int n_mel) int
    whisper_token_beg(Pointer<whisper_context> ctx) int
    whisper_token_count(Pointer<whisper_context> ctx, Pointer<Char> text) int
    Return the number of tokens in the provided text Equivalent to: -whisper_tokenize(ctx, text, NULL, 0)
    whisper_token_eot(Pointer<whisper_context> ctx) int
    Special tokens
    whisper_token_lang(Pointer<whisper_context> ctx, int lang_id) int
    whisper_token_nosp(Pointer<whisper_context> ctx) int
    whisper_token_not(Pointer<whisper_context> ctx) int
    whisper_token_prev(Pointer<whisper_context> ctx) int
    whisper_token_solm(Pointer<whisper_context> ctx) int
    whisper_token_sot(Pointer<whisper_context> ctx) int
    whisper_token_to_str(Pointer<whisper_context> ctx, int token) Pointer<Char>
    Token Id -> String. Uses the vocabulary in the provided context
    whisper_token_transcribe(Pointer<whisper_context> ctx) int
    whisper_token_translate(Pointer<whisper_context> ctx) int
    Task tokens
    whisper_tokenize(Pointer<whisper_context> ctx, Pointer<Char> text, Pointer<whisper_token> tokens, int n_max_tokens) int
    Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens. Returns the number of tokens on success, no more than n_max_tokens Returns a negative number on failure - the number of tokens that would have been returned TODO: not sure if correct

    Operators

    operator ==(Object other) bool
    The equality operator.
    inherited