WhisperDartBindings class
Bindings for src/whisper4dart.h
.
Regenerate bindings with flutter pub run ffigen --config ffigen.yaml
.
Constructors
- WhisperDartBindings.new(DynamicLibrary dynamicLibrary)
-
The symbols are looked up in
dynamicLibrary
. -
WhisperDartBindings.fromLookup(Pointer<
T> lookup<T extends NativeType>(String symbolName) ) -
The symbols are looked up with
lookup
.
Properties
- GGML_TENSOR_SIZE ↔ int
-
getter/setter pair
- hashCode → int
-
The hash code for this object.
no setterinherited
- runtimeType → Type
-
A representation of the runtime type of the object.
no setterinherited
Methods
-
ggml_abort(
Pointer< Char> file, int line, Pointer<Char> fmt) → void -
ggml_abs(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_abs_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_acc(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor> - dst = a view(dst, nb1, nb2, nb3, offset) += b return dst
-
ggml_acc_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor> -
ggml_add(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_add1(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_add1_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_add_cast(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int type) → Pointer<ggml_tensor> -
ggml_add_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_add_rel_pos(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> pw, Pointer<ggml_tensor> ph) → Pointer<ggml_tensor> - used in sam
-
ggml_add_rel_pos_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> pw, Pointer<ggml_tensor> ph) → Pointer<ggml_tensor> -
ggml_arange(
Pointer< ggml_context> ctx, double start, double stop, double step) → Pointer<ggml_tensor> -
ggml_are_same_shape(
Pointer< ggml_tensor> t0, Pointer<ggml_tensor> t1) → bool -
ggml_are_same_stride(
Pointer< ggml_tensor> t0, Pointer<ggml_tensor> t1) → bool -
ggml_argmax(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> - argmax along rows
-
ggml_argsort(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int order) → Pointer<ggml_tensor> -
ggml_backend_alloc_buffer(
ggml_backend_t backend, int size) → ggml_backend_buffer_t -
ggml_backend_buffer_clear(
ggml_backend_buffer_t buffer, int value) → void -
ggml_backend_buffer_free(
ggml_backend_buffer_t buffer) → void -
ggml_backend_buffer_get_alignment(
ggml_backend_buffer_t buffer) → int -
ggml_backend_buffer_get_alloc_size(
ggml_backend_buffer_t buffer, Pointer< ggml_tensor> tensor) → int -
ggml_backend_buffer_get_base(
ggml_backend_buffer_t buffer) → Pointer< Void> -
ggml_backend_buffer_get_max_size(
ggml_backend_buffer_t buffer) → int -
ggml_backend_buffer_get_size(
ggml_backend_buffer_t buffer) → int -
ggml_backend_buffer_get_type(
ggml_backend_buffer_t buffer) → ggml_backend_buffer_type_t -
ggml_backend_buffer_get_usage(
ggml_backend_buffer_t buffer) → int -
ggml_backend_buffer_init_tensor(
ggml_backend_buffer_t buffer, Pointer< ggml_tensor> tensor) → void -
ggml_backend_buffer_is_host(
ggml_backend_buffer_t buffer) → bool -
ggml_backend_buffer_name(
ggml_backend_buffer_t buffer) → Pointer< Char> -
ggml_backend_buffer_reset(
ggml_backend_buffer_t buffer) → void -
ggml_backend_buffer_set_usage(
ggml_backend_buffer_t buffer, int usage) → void -
ggml_backend_buft_alloc_buffer(
ggml_backend_buffer_type_t buft, int size) → ggml_backend_buffer_t -
ggml_backend_buft_get_alignment(
ggml_backend_buffer_type_t buft) → int -
ggml_backend_buft_get_alloc_size(
ggml_backend_buffer_type_t buft, Pointer< ggml_tensor> tensor) → int -
ggml_backend_buft_get_device(
ggml_backend_buffer_type_t buft) → ggml_backend_dev_t -
ggml_backend_buft_get_max_size(
ggml_backend_buffer_type_t buft) → int -
ggml_backend_buft_is_host(
ggml_backend_buffer_type_t buft) → bool -
ggml_backend_buft_name(
ggml_backend_buffer_type_t buft) → Pointer< Char> - Backend buffer type
-
ggml_backend_compare_graph_backend(
ggml_backend_t backend1, ggml_backend_t backend2, Pointer< ggml_cgraph> graph, ggml_backend_eval_callback callback, Pointer<Void> user_data) → bool - Compare the output of two backends
-
ggml_backend_cpu_buffer_from_ptr(
Pointer< Void> ptr, int size) → ggml_backend_buffer_t - CPU buffer types are always available
-
ggml_backend_cpu_buffer_type(
) → ggml_backend_buffer_type_t -
ggml_backend_cpu_init(
) → ggml_backend_t - CPU backend
-
ggml_backend_cpu_reg(
) → ggml_backend_reg_t -
ggml_backend_cpu_set_abort_callback(
ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, Pointer< Void> abort_callback_data) → void -
ggml_backend_cpu_set_n_threads(
ggml_backend_t backend_cpu, int n_threads) → void -
ggml_backend_cpu_set_threadpool(
ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) → void -
ggml_backend_dev_backend_reg(
ggml_backend_dev_t device) → ggml_backend_reg_t -
ggml_backend_dev_buffer_from_host_ptr(
ggml_backend_dev_t device, Pointer< Void> ptr, int size, int max_tensor_size) → ggml_backend_buffer_t -
ggml_backend_dev_buffer_type(
ggml_backend_dev_t device) → ggml_backend_buffer_type_t -
ggml_backend_dev_by_name(
Pointer< Char> name) → ggml_backend_dev_t -
ggml_backend_dev_by_type(
int type) → ggml_backend_dev_t -
ggml_backend_dev_count(
) → int - Device enumeration
-
ggml_backend_dev_description(
ggml_backend_dev_t device) → Pointer< Char> -
ggml_backend_dev_get(
int index) → ggml_backend_dev_t -
ggml_backend_dev_get_props(
ggml_backend_dev_t device, Pointer< ggml_backend_dev_props> props) → void -
ggml_backend_dev_host_buffer_type(
ggml_backend_dev_t device) → ggml_backend_buffer_type_t -
ggml_backend_dev_init(
ggml_backend_dev_t device, Pointer< Char> params) → ggml_backend_t -
ggml_backend_dev_memory(
ggml_backend_dev_t device, Pointer< Size> free, Pointer<Size> total) → void -
ggml_backend_dev_name(
ggml_backend_dev_t device) → Pointer< Char> -
ggml_backend_dev_offload_op(
ggml_backend_dev_t device, Pointer< ggml_tensor> op) → bool -
ggml_backend_dev_supports_buft(
ggml_backend_dev_t device, ggml_backend_buffer_type_t buft) → bool -
ggml_backend_dev_supports_op(
ggml_backend_dev_t device, Pointer< ggml_tensor> op) → bool -
ggml_backend_dev_type1(
ggml_backend_dev_t device) → int -
ggml_backend_device_register(
ggml_backend_dev_t device) → void - Backend registry
-
ggml_backend_event_free(
ggml_backend_event_t event) → void -
ggml_backend_event_new(
ggml_backend_dev_t device) → ggml_backend_event_t - Events
-
ggml_backend_event_record(
ggml_backend_event_t event, ggml_backend_t backend) → void -
ggml_backend_event_synchronize(
ggml_backend_event_t event) → void -
ggml_backend_event_wait(
ggml_backend_t backend, ggml_backend_event_t event) → void -
ggml_backend_free(
ggml_backend_t backend) → void -
ggml_backend_get_alignment(
ggml_backend_t backend) → int -
ggml_backend_get_default_buffer_type(
ggml_backend_t backend) → ggml_backend_buffer_type_t -
ggml_backend_get_device(
ggml_backend_t backend) → ggml_backend_dev_t -
ggml_backend_get_max_size(
ggml_backend_t backend) → int -
ggml_backend_graph_compute(
ggml_backend_t backend, Pointer< ggml_cgraph> cgraph) → int -
ggml_backend_graph_compute_async(
ggml_backend_t backend, Pointer< ggml_cgraph> cgraph) → int -
ggml_backend_graph_copy1(
ggml_backend_t backend, Pointer< ggml_cgraph> graph) → ggml_backend_graph_copy - Copy a graph to a different backend
-
ggml_backend_graph_copy_free(
ggml_backend_graph_copy copy) → void -
ggml_backend_graph_plan_compute(
ggml_backend_t backend, ggml_backend_graph_plan_t plan) → int -
ggml_backend_graph_plan_create(
ggml_backend_t backend, Pointer< ggml_cgraph> cgraph) → ggml_backend_graph_plan_t -
ggml_backend_graph_plan_free(
ggml_backend_t backend, ggml_backend_graph_plan_t plan) → void -
ggml_backend_guid(
ggml_backend_t backend) → ggml_guid_t - Backend (stream)
-
ggml_backend_init_best(
) → ggml_backend_t - = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
-
ggml_backend_init_by_name(
Pointer< Char> name, Pointer<Char> params) → ggml_backend_t - Direct backend (stream) initialization = ggml_backend_dev_init(ggml_backend_dev_by_name(name), params)
-
ggml_backend_init_by_type(
int type, Pointer< Char> params) → ggml_backend_t - = ggml_backend_dev_init(ggml_backend_dev_by_type(type), params)
-
ggml_backend_is_cpu(
ggml_backend_t backend) → bool -
ggml_backend_load(
Pointer< Char> path) → ggml_backend_reg_t - Load a backend from a dynamic library and register it
-
ggml_backend_load_all(
) → void - Load all known backends from dynamic libraries
-
ggml_backend_load_all_from_path(
Pointer< Char> dir_path) → void -
ggml_backend_name(
ggml_backend_t backend) → Pointer< Char> -
ggml_backend_offload_op(
ggml_backend_t backend, Pointer< ggml_tensor> op) → bool -
ggml_backend_reg_by_name(
Pointer< Char> name) → ggml_backend_reg_t -
ggml_backend_reg_count(
) → int - Backend (reg) enumeration
-
ggml_backend_reg_dev_count(
ggml_backend_reg_t reg) → int -
ggml_backend_reg_dev_get(
ggml_backend_reg_t reg, int index) → ggml_backend_dev_t -
ggml_backend_reg_get(
int index) → ggml_backend_reg_t -
ggml_backend_reg_get_proc_address(
ggml_backend_reg_t reg, Pointer< Char> name) → Pointer<Void> -
ggml_backend_reg_name(
ggml_backend_reg_t reg) → Pointer< Char> - Backend (reg)
-
ggml_backend_sched_alloc_graph(
ggml_backend_sched_t sched, Pointer< ggml_cgraph> graph) → bool - Allocate and compute graph on the backend scheduler
-
ggml_backend_sched_free(
ggml_backend_sched_t sched) → void -
ggml_backend_sched_get_backend(
ggml_backend_sched_t sched, int i) → ggml_backend_t -
ggml_backend_sched_get_buffer_size(
ggml_backend_sched_t sched, ggml_backend_t backend) → int -
ggml_backend_sched_get_n_backends(
ggml_backend_sched_t sched) → int -
ggml_backend_sched_get_n_copies(
ggml_backend_sched_t sched) → int -
ggml_backend_sched_get_n_splits(
ggml_backend_sched_t sched) → int - Get the number of splits of the last graph
-
ggml_backend_sched_get_tensor_backend(
ggml_backend_sched_t sched, Pointer< ggml_tensor> node) → ggml_backend_t -
ggml_backend_sched_graph_compute(
ggml_backend_sched_t sched, Pointer< ggml_cgraph> graph) → int -
ggml_backend_sched_graph_compute_async(
ggml_backend_sched_t sched, Pointer< ggml_cgraph> graph) → int -
ggml_backend_sched_new(
Pointer< ggml_backend_t> backends, Pointer<ggml_backend_buffer_type_t> bufts, int n_backends, int graph_size, bool parallel) → ggml_backend_sched_t - Initialize a backend scheduler, backends with low index are given priority over backends with high index
-
ggml_backend_sched_reserve(
ggml_backend_sched_t sched, Pointer< ggml_cgraph> measure_graph) → bool - Initialize backend buffers from a measure graph
-
ggml_backend_sched_reset(
ggml_backend_sched_t sched) → void - Reset all assignments and allocators - must be called before changing the node backends or allocating a new graph. This in effect deallocates all tensors that were previously allocated and leaves them with dangling pointers. The correct way to use this API is to discard the deallocated tensors and create new ones.
-
ggml_backend_sched_set_eval_callback(
ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, Pointer< Void> user_data) → void - Set a callback to be called for each resulting node during graph compute
-
ggml_backend_sched_set_tensor_backend(
ggml_backend_sched_t sched, Pointer< ggml_tensor> node, ggml_backend_t backend) → void -
ggml_backend_sched_synchronize(
ggml_backend_sched_t sched) → void -
ggml_backend_supports_buft(
ggml_backend_t backend, ggml_backend_buffer_type_t buft) → bool -
ggml_backend_supports_op(
ggml_backend_t backend, Pointer< ggml_tensor> op) → bool - NOTE: will be removed, use device version instead
-
ggml_backend_synchronize(
ggml_backend_t backend) → void -
ggml_backend_tensor_alloc(
ggml_backend_buffer_t buffer, Pointer< ggml_tensor> tensor, Pointer<Void> addr) → void - Tensor initialization
-
ggml_backend_tensor_copy(
Pointer< ggml_tensor> src, Pointer<ggml_tensor> dst) → void - tensor copy between different backends
-
ggml_backend_tensor_copy_async(
ggml_backend_t backend_src, ggml_backend_t backend_dst, Pointer< ggml_tensor> src, Pointer<ggml_tensor> dst) → void - asynchronous copy the copy is performed after all the currently queued operations in backend_src backend_dst will wait for the copy to complete before performing other operations automatic fallback to sync copy if async is not supported
-
ggml_backend_tensor_get(
Pointer< ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void -
ggml_backend_tensor_get_async(
ggml_backend_t backend, Pointer< ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void -
ggml_backend_tensor_memset(
Pointer< ggml_tensor> tensor, int value, int offset, int size) → void -
ggml_backend_tensor_set(
Pointer< ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void - "offset" refers to the offset in tensor->data for setting/getting data
-
ggml_backend_tensor_set_async(
ggml_backend_t backend, Pointer< ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void -
ggml_backend_unload(
ggml_backend_reg_t reg) → void - Unload a backend if loaded dynamically and unregister it
-
ggml_backend_view_init(
Pointer< ggml_tensor> tensor) → void -
ggml_bf16_to_fp32(
ggml_bf16_t arg0) → double -
ggml_bf16_to_fp32_row(
Pointer< ggml_bf16_t> arg0, Pointer<Float> arg1, int arg2) → void -
ggml_blck_size(
int type) → int -
ggml_build_backward_expand(
Pointer< ggml_context> ctx_static, Pointer<ggml_context> ctx_compute, Pointer<ggml_cgraph> cgraph, bool accumulate) → void -
ggml_build_forward_expand(
Pointer< ggml_cgraph> cgraph, Pointer<ggml_tensor> tensor) → void - automatic differentiation
-
ggml_can_repeat(
Pointer< ggml_tensor> t0, Pointer<ggml_tensor> t1) → bool -
ggml_cast(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int type) → Pointer<ggml_tensor> -
ggml_clamp(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double min, double max) → Pointer<ggml_tensor> - clamp in-place, returns view(a)
-
ggml_concat(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int dim) → Pointer<ggml_tensor> - concat a and b along dim used in stable-diffusion
-
ggml_cont(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> - make contiguous
-
ggml_cont_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0) → Pointer<ggml_tensor> - make contiguous, with new shape
-
ggml_cont_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1) → Pointer<ggml_tensor> -
ggml_cont_3d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2) → Pointer<ggml_tensor> -
ggml_cont_4d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor> -
ggml_conv_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) → Pointer<ggml_tensor> -
ggml_conv_1d_dw(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) → Pointer<ggml_tensor> - depthwise TODO: this is very likely wrong for some cases! - needs more testing
-
ggml_conv_1d_dw_ph(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int d0) → Pointer<ggml_tensor> -
ggml_conv_1d_ph(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s, int d) → Pointer<ggml_tensor> -
conv_1d with padding = half
alias for ggml_conv_1d(a, b, s, a->ne
0
/2, d) -
ggml_conv_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1) → Pointer<ggml_tensor> -
ggml_conv_2d_dw(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1) → Pointer<ggml_tensor> - depthwise
-
ggml_conv_2d_s1_ph(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
kernel size is a->ne
0
x a->ne1
stride is 1 padding is half example: a: 3 3 256 256 b: 64 64 256 1 res: 64 64 256 1 used in sam -
ggml_conv_2d_sk_p0(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
kernel size is a->ne
0
x a->ne1
stride is equal to kernel size padding is zero example: a: 16 16 3 768 b: 1024 1024 3 1 res: 64 64 768 1 used in sam -
ggml_conv_transpose_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) → Pointer<ggml_tensor> -
ggml_conv_transpose_2d_p0(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int stride) → Pointer<ggml_tensor> -
ggml_cos(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_cos_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_count_equal(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> - count number of equal elements in a and b
-
ggml_cpu_get_sve_cnt(
) → int -
ggml_cpu_has_amx_int8(
) → int -
ggml_cpu_has_arm_fma(
) → int -
ggml_cpu_has_avx(
) → int -
ggml_cpu_has_avx2(
) → int -
ggml_cpu_has_avx512(
) → int -
ggml_cpu_has_avx512_bf16(
) → int -
ggml_cpu_has_avx512_vbmi(
) → int -
ggml_cpu_has_avx512_vnni(
) → int -
ggml_cpu_has_avx_vnni(
) → int -
ggml_cpu_has_dotprod(
) → int -
ggml_cpu_has_f16c(
) → int -
ggml_cpu_has_fma(
) → int -
ggml_cpu_has_fp16_va(
) → int -
ggml_cpu_has_llamafile(
) → int -
ggml_cpu_has_matmul_int8(
) → int -
ggml_cpu_has_neon(
) → int - ARM
-
ggml_cpu_has_riscv_v(
) → int - other
-
ggml_cpu_has_sse3(
) → int - x86
-
ggml_cpu_has_ssse3(
) → int -
ggml_cpu_has_sve(
) → int -
ggml_cpu_has_vsx(
) → int -
ggml_cpu_has_wasm_simd(
) → int -
ggml_cpu_init(
) → void -
ggml_cpy(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> - a -> b, return view(b)
-
ggml_cross_entropy_loss(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> - loss function
-
ggml_cross_entropy_loss_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c) → Pointer<ggml_tensor> -
ggml_cycles(
) → int -
ggml_cycles_per_ms(
) → int -
ggml_diag(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_diag_mask_inf(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor> - set elements above the diagonal to -INF
-
ggml_diag_mask_inf_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor> - in-place, returns view(a)
-
ggml_diag_mask_zero(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor> - set elements above the diagonal to 0
-
ggml_diag_mask_zero_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor> - in-place, returns view(a)
-
ggml_div(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_div_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_dup(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> - operations on tensors with backpropagation
-
ggml_dup_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> - in-place, returns view(a)
-
ggml_dup_tensor(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> src) → Pointer<ggml_tensor> -
ggml_element_size(
Pointer< ggml_tensor> tensor) → int -
ggml_elu(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_elu_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_exp(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_exp_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_flash_attn_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> q, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> d, bool masked) → Pointer<ggml_tensor> - TODO: needs to be adapted to ggml_flash_attn_ext
-
ggml_flash_attn_ext(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> q, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> mask, double scale, double max_bias, double logit_softcap) → Pointer<ggml_tensor> -
q:
n_embd, n_batch, n_head, 1
k:n_embd, n_kv, n_head_kv, 1
v:n_embd, n_kv, n_head_kv, 1
!! not transposed !! mask:n_kv, n_batch_pad, 1, 1
!! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !! res:n_embd, n_head, n_batch, 1
!! permuted !! -
ggml_flash_attn_ext_get_prec(
Pointer< ggml_tensor> a) → int -
ggml_flash_attn_ext_set_prec(
Pointer< ggml_tensor> a, int prec) → void -
ggml_fopen(
Pointer< Char> fname, Pointer<Char> mode) → Pointer<FILE> - accepts a UTF-8 path, even on Windows
-
ggml_format_name(
Pointer< ggml_tensor> tensor, Pointer<Char> fmt) → Pointer<ggml_tensor> -
ggml_fp16_to_fp32(
int arg0) → double -
ggml_fp16_to_fp32_row(
Pointer< ggml_fp16_t> arg0, Pointer<Float> arg1, int arg2) → void -
ggml_fp32_to_bf16(
double arg0) → ggml_bf16_t -
ggml_fp32_to_bf16_row(
Pointer< Float> arg0, Pointer<ggml_bf16_t> arg1, int arg2) → void -
ggml_fp32_to_bf16_row_ref(
Pointer< Float> arg0, Pointer<ggml_bf16_t> arg1, int arg2) → void -
ggml_fp32_to_fp16(
double arg0) → int -
ggml_fp32_to_fp16_row(
Pointer< Float> arg0, Pointer<ggml_fp16_t> arg1, int arg2) → void -
ggml_free(
Pointer< ggml_context> ctx) → void -
ggml_ftype_to_ggml_type(
int ftype) → int - TODO: temporary until model loading of ggml examples is refactored
-
ggml_gated_linear_attn(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> q, Pointer<ggml_tensor> g, Pointer<ggml_tensor> state, double scale) → Pointer<ggml_tensor> -
ggml_gelu(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_gelu_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_gelu_quick(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_gelu_quick_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_get_data(
Pointer< ggml_tensor> tensor) → Pointer<Void> -
ggml_get_data_f32(
Pointer< ggml_tensor> tensor) → Pointer<Float> -
ggml_get_f32_1d(
Pointer< ggml_tensor> tensor, int i) → double -
ggml_get_f32_nd(
Pointer< ggml_tensor> tensor, int i0, int i1, int i2, int i3) → double -
ggml_get_first_tensor(
Pointer< ggml_context> ctx) → Pointer<ggml_tensor> - Context tensor enumeration and lookup
-
ggml_get_i32_1d(
Pointer< ggml_tensor> tensor, int i) → int -
ggml_get_i32_nd(
Pointer< ggml_tensor> tensor, int i0, int i1, int i2, int i3) → int -
ggml_get_max_tensor_size(
Pointer< ggml_context> ctx) → int -
ggml_get_mem_buffer(
Pointer< ggml_context> ctx) → Pointer<Void> -
ggml_get_mem_size(
Pointer< ggml_context> ctx) → int -
ggml_get_name(
Pointer< ggml_tensor> tensor) → Pointer<Char> -
ggml_get_next_tensor(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> tensor) → Pointer<ggml_tensor> -
ggml_get_no_alloc(
Pointer< ggml_context> ctx) → bool -
ggml_get_rel_pos(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int qh, int kh) → Pointer<ggml_tensor> - used in sam
-
ggml_get_rows(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
supports 3D: a->ne
2
== b->ne1
-
ggml_get_rows_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c) → Pointer<ggml_tensor> -
ggml_get_tensor(
Pointer< ggml_context> ctx, Pointer<Char> name) → Pointer<ggml_tensor> -
ggml_get_type_traits(
int type) → Pointer< ggml_type_traits> -
ggml_get_type_traits_cpu(
int type) → Pointer< ggml_type_traits_cpu> -
ggml_get_unary_op(
Pointer< ggml_tensor> tensor) → int -
ggml_graph_add_node(
Pointer< ggml_cgraph> cgraph, Pointer<ggml_tensor> tensor) → void -
ggml_graph_clear(
Pointer< ggml_cgraph> cgraph) → void -
ggml_graph_compute(
Pointer< ggml_cgraph> cgraph, Pointer<ggml_cplan> cplan) → int -
ggml_graph_compute_with_ctx(
Pointer< ggml_context> ctx, Pointer<ggml_cgraph> cgraph, int n_threads) → int - same as ggml_graph_compute() but the work data is allocated as a part of the context note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
-
ggml_graph_cpy(
Pointer< ggml_cgraph> src, Pointer<ggml_cgraph> dst) → void -
ggml_graph_dump_dot(
Pointer< ggml_cgraph> gb, Pointer<ggml_cgraph> gf, Pointer<Char> filename) → void - dump the graph into a file using the dot format
-
ggml_graph_dup(
Pointer< ggml_context> ctx, Pointer<ggml_cgraph> cgraph) → Pointer<ggml_cgraph> -
ggml_graph_export(
Pointer< ggml_cgraph> cgraph, Pointer<Char> fname) → void -
ggml_graph_get_grad(
Pointer< ggml_cgraph> cgraph, Pointer<ggml_tensor> node) → Pointer<ggml_tensor> -
ggml_graph_get_grad_acc(
Pointer< ggml_cgraph> cgraph, Pointer<ggml_tensor> node) → Pointer<ggml_tensor> -
ggml_graph_get_tensor(
Pointer< ggml_cgraph> cgraph, Pointer<Char> name) → Pointer<ggml_tensor> -
ggml_graph_import(
Pointer< Char> fname, Pointer<Pointer< ctx_data, Pointer<ggml_context> >Pointer< ctx_eval) → Pointer<ggml_context> >ggml_cgraph> -
ggml_graph_n_nodes(
Pointer< ggml_cgraph> cgraph) → int -
ggml_graph_node(
Pointer< ggml_cgraph> cgraph, int i) → Pointer<ggml_tensor> -
ggml_graph_nodes(
Pointer< ggml_cgraph> cgraph) → Pointer<Pointer< ggml_tensor> > -
ggml_graph_overhead(
) → int -
ggml_graph_overhead_custom(
int size, bool grads) → int -
ggml_graph_plan(
Pointer< ggml_cgraph> cgraph, int n_threads, Pointer<ggml_threadpool> threadpool) → ggml_cplan - ggml_graph_plan() has to be called before ggml_graph_compute() when plan.work_size > 0, caller must allocate memory for plan.work_data
-
ggml_graph_print(
Pointer< ggml_cgraph> cgraph) → void - print info and performance information for the graph
-
ggml_graph_reset(
Pointer< ggml_cgraph> cgraph) → void -
ggml_graph_size(
Pointer< ggml_cgraph> cgraph) → int -
ggml_group_norm(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_groups, double eps) → Pointer<ggml_tensor> - group normalize along ne0ne1n_groups used in stable-diffusion
-
ggml_group_norm_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_groups, double eps) → Pointer<ggml_tensor> -
ggml_guid_matches(
ggml_guid_t guid_a, ggml_guid_t guid_b) → bool -
ggml_hardsigmoid(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> - hardsigmoid(x) = relu6(x + 3) / 6
-
ggml_hardswish(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> - hardswish(x) = x * relu6(x + 3) / 6
-
ggml_im2col(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1, bool is_2D, int dst_type) → Pointer<ggml_tensor> - im2col converts data into a format that effectively results in a convolution when combined with matrix multiplication
-
ggml_im2col_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<Int64> ne, int s0, int s1, int p0, int p1, int d0, int d1, bool is_2D) → Pointer<ggml_tensor> -
ggml_init(
ggml_init_params params) → Pointer< ggml_context> - main
-
ggml_is_3d(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_contiguous(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_contiguous_0(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_contiguous_1(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_contiguous_2(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_empty(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_matrix(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_numa(
) → bool -
ggml_is_permuted(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_quantized(
int type) → bool -
ggml_is_scalar(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_transposed(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_vector(
Pointer< ggml_tensor> tensor) → bool -
ggml_leaky_relu(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double negative_slope, bool inplace) → Pointer<ggml_tensor> -
ggml_log(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_log_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_log_set(
ggml_log_callback log_callback, Pointer< Void> user_data) → void - Set callback for all future logging events. If this is not called, or NULL is supplied, everything is output on stderr.
-
ggml_map_binary_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_binary_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_binary_inplace_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_binary_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom1(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> - n_tasks == GGML_N_TASKS_MAX means to use max number of tasks
-
ggml_map_custom1_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom1_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom1_inplace_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom2(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom2_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom2_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom2_inplace_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom3(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom3_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom3_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom3_inplace_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_unary_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_unary_inplace_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_mean(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> - mean along rows
-
ggml_mul(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_mul_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_mul_mat(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
A: k columns, n rows =>
ne03, ne02, n, k
B: k columns, m rows (i.e. we transpose it internally) =>ne03 * x, ne02 * y, m, k
result is n columns, m rows =>ne03 * x, ne02 * y, m, n
-
ggml_mul_mat_id(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> as1, Pointer<ggml_tensor> b, Pointer<ggml_tensor> ids) → Pointer<ggml_tensor> - indirect matrix multiplication
-
ggml_mul_mat_set_prec(
Pointer< ggml_tensor> a, int prec) → void - change the precision of a matrix multiplication set to GGML_PREC_F32 for higher precision (useful for phi-2)
-
ggml_n_dims(
Pointer< ggml_tensor> tensor) → int -
ggml_nbytes(
Pointer< ggml_tensor> tensor) → int -
ggml_nbytes_pad(
Pointer< ggml_tensor> tensor) → int -
ggml_neg(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_neg_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_nelements(
Pointer< ggml_tensor> tensor) → int -
ggml_new_buffer(
Pointer< ggml_context> ctx, int nbytes) → Pointer<Void> -
ggml_new_f32(
Pointer< ggml_context> ctx, double value) → Pointer<ggml_tensor> -
ggml_new_graph(
Pointer< ggml_context> ctx) → Pointer<ggml_cgraph> - graph allocation in a context
-
ggml_new_graph_custom(
Pointer< ggml_context> ctx, int size, bool grads) → Pointer<ggml_cgraph> -
ggml_new_i32(
Pointer< ggml_context> ctx, int value) → Pointer<ggml_tensor> -
ggml_new_tensor(
Pointer< ggml_context> ctx, int type, int n_dims, Pointer<Int64> ne) → Pointer<ggml_tensor> -
ggml_new_tensor_1d(
Pointer< ggml_context> ctx, int type, int ne0) → Pointer<ggml_tensor> -
ggml_new_tensor_2d(
Pointer< ggml_context> ctx, int type, int ne0, int ne1) → Pointer<ggml_tensor> -
ggml_new_tensor_3d(
Pointer< ggml_context> ctx, int type, int ne0, int ne1, int ne2) → Pointer<ggml_tensor> -
ggml_new_tensor_4d(
Pointer< ggml_context> ctx, int type, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor> -
ggml_norm(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor> - normalize along rows
-
ggml_norm_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor> -
ggml_nrows(
Pointer< ggml_tensor> tensor) → int -
ggml_numa_init(
int numa) → void -
ggml_op_desc(
Pointer< ggml_tensor> t) → Pointer<Char> -
ggml_op_name(
int op) → Pointer< Char> -
ggml_op_symbol(
int op) → Pointer< Char> -
ggml_opt_step_adamw(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> grad, Pointer<ggml_tensor> m, Pointer<ggml_tensor> v, Pointer<ggml_tensor> adamw_params) → Pointer<ggml_tensor> - AdamW optimizer step Paper: https://arxiv.org/pdf/1711.05101v3.pdf PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
-
ggml_out_prod(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> - A: m columns, n rows, B: p columns, n rows, result is m columns, p rows
-
ggml_pad(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int p0, int p1, int p2, int p3) → Pointer<ggml_tensor> -
pad each dimension with zeros:
x, ..., x
->x, ..., x, 0, ..., 0
-
ggml_pad_reflect_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int p0, int p1) → Pointer<ggml_tensor> -
pad each dimension with reflection:
a, b, c, d
->b, a, b, c, d, c
-
ggml_permute(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int axis0, int axis1, int axis2, int axis3) → Pointer<ggml_tensor> -
ggml_pool_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int op, int k0, int s0, int p0) → Pointer<ggml_tensor> -
ggml_pool_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int op, int k0, int k1, int s0, int s1, double p0, double p1) → Pointer<ggml_tensor> - the result will have 2p0 padding for the first dimension and 2p1 padding for the second dimension
-
ggml_pool_2d_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> af, int op, int k0, int k1, int s0, int s1, double p0, double p1) → Pointer<ggml_tensor> -
ggml_print_object(
Pointer< ggml_object> obj) → void -
ggml_print_objects(
Pointer< ggml_context> ctx) → void -
ggml_quantize_chunk(
int type, Pointer< Float> src, Pointer<Void> dst, int start, int nrows, int n_per_row, Pointer<Float> imatrix) → int - calls ggml_quantize_init internally (i.e. can allocate memory)
-
ggml_quantize_free(
) → void -
ggml_quantize_init(
int type) → void -
ggml_quantize_init can be called multiple times with the same type it will only initialize the quantization tables for the first call or after ggml_quantize_free automatically called by ggml_quantize_chunk for convenience
ggml_quantize_free will free any memory allocated by ggml_quantize_init call this at the end of the program to avoid memory leaks
-
ggml_quantize_requires_imatrix(
int type) → bool - some quantization type cannot be used without an importance matrix
-
ggml_relu(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_relu_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_repeat(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> - if a is the same shape as b, and a is not parameter, return a otherwise, return a new tensor: repeat(a) to fit in b
-
ggml_repeat_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> - sums repetitions in a into shape of b
-
ggml_reset(
Pointer< ggml_context> ctx) → void -
ggml_reshape(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> - return view(a), b specifies the new shape TODO: when we start computing gradient, make a copy instead of view
-
ggml_reshape_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0) → Pointer<ggml_tensor> - return view(a) TODO: when we start computing gradient, make a copy instead of view
-
ggml_reshape_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1) → Pointer<ggml_tensor> -
ggml_reshape_3d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2) → Pointer<ggml_tensor> - return view(a) TODO: when we start computing gradient, make a copy instead of view
-
ggml_reshape_4d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor> -
ggml_rms_norm(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor> -
ggml_rms_norm_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double eps) → Pointer<ggml_tensor> - a - x b - dy
-
ggml_rms_norm_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor> -
ggml_rope(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode) → Pointer<ggml_tensor> - rotary position embedding if (mode & 1) - skip n_past elements (NOT SUPPORTED) if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
-
ggml_rope_custom(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> -
ggml_rope_custom_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> -
ggml_rope_ext(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> - custom RoPE c is freq factors (e.g. phi3-128k), (optional)
-
ggml_rope_ext_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> - rotary position embedding backward, i.e compute dx from dy a - dy
-
ggml_rope_ext_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> - in-place, returns view(a)
-
ggml_rope_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode) → Pointer<ggml_tensor> - in-place, returns view(a)
-
ggml_rope_multi(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, Pointer<Int> sections, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> -
ggml_rope_multi_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, Pointer<Int> sections, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> -
ggml_rope_yarn_corr_dims(
int n_dims, int n_ctx_orig, double freq_base, double beta_fast, double beta_slow, Pointer< Float> dims) → void - compute correction dims for YaRN RoPE scaling
-
ggml_row_size(
int type, int ne) → int -
ggml_rwkv_wkv6(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> r, Pointer<ggml_tensor> tf, Pointer<ggml_tensor> td, Pointer<ggml_tensor> state) → Pointer<ggml_tensor> -
ggml_scale(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double s) → Pointer<ggml_tensor> - operations on tensors without backpropagation
-
ggml_scale_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double s) → Pointer<ggml_tensor> - in-place, returns view(a)
-
ggml_set(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor> - b -> view(a,offset,nb1,nb2,3), return modified a
-
ggml_set_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int offset) → Pointer<ggml_tensor> -
ggml_set_1d_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int offset) → Pointer<ggml_tensor> -
ggml_set_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int offset) → Pointer<ggml_tensor> - b -> view(a,offset,nb1,nb2,3), return modified a
-
ggml_set_2d_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int offset) → Pointer<ggml_tensor> - b -> view(a,offset,nb1,nb2,3), return view(a)
-
ggml_set_f32(
Pointer< ggml_tensor> tensor, double value) → Pointer<ggml_tensor> -
ggml_set_f32_1d(
Pointer< ggml_tensor> tensor, int i, double value) → void -
ggml_set_f32_nd(
Pointer< ggml_tensor> tensor, int i0, int i1, int i2, int i3, double value) → void -
ggml_set_i32(
Pointer< ggml_tensor> tensor, int value) → Pointer<ggml_tensor> -
ggml_set_i32_1d(
Pointer< ggml_tensor> tensor, int i, int value) → void -
ggml_set_i32_nd(
Pointer< ggml_tensor> tensor, int i0, int i1, int i2, int i3, int value) → void -
ggml_set_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor> - b -> view(a,offset,nb1,nb2,3), return view(a)
-
ggml_set_input(
Pointer< ggml_tensor> tensor) → void - Tensor flags
-
ggml_set_loss(
Pointer< ggml_tensor> tensor) → void -
ggml_set_name(
Pointer< ggml_tensor> tensor, Pointer<Char> name) → Pointer<ggml_tensor> -
ggml_set_no_alloc(
Pointer< ggml_context> ctx, bool no_alloc) → void -
ggml_set_output(
Pointer< ggml_tensor> tensor) → void -
ggml_set_param(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> tensor) → void -
ggml_set_zero(
Pointer< ggml_tensor> tensor) → Pointer<ggml_tensor> -
ggml_sgn(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sgn_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sigmoid(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sigmoid_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_silu(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_silu_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> - a - x b - dy
-
ggml_silu_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sin(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sin_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_soft_max(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_soft_max_ext(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> mask, double scale, double max_bias) → Pointer<ggml_tensor> - fused soft_max(ascale + mask(ALiBi slope)) mask is optional max_bias = 0.0f for no ALiBi
-
ggml_soft_max_ext_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double scale, double max_bias) → Pointer<ggml_tensor> -
ggml_soft_max_ext_back_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double scale, double max_bias) → Pointer<ggml_tensor> - in-place, returns view(a)
-
ggml_soft_max_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> - in-place, returns view(a)
-
ggml_sqr(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sqr_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sqrt(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sqrt_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_ssm_conv(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> sx, Pointer<ggml_tensor> c) → Pointer<ggml_tensor> -
ggml_ssm_scan(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> s, Pointer<ggml_tensor> x, Pointer<ggml_tensor> dt, Pointer<ggml_tensor> A, Pointer<ggml_tensor> B, Pointer<ggml_tensor> C) → Pointer<ggml_tensor> -
ggml_status_to_string(
int status) → Pointer< Char> - get ggml_status name string
-
ggml_step(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_step_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sub(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_sub_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_sum(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> - return scalar
-
ggml_sum_rows(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
sums along rows, with input shape
a,b,c,d
return shape1,b,c,d
-
ggml_tanh(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_tanh_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_tensor_overhead(
) → int - use this to compute the memory overhead of a tensor
-
ggml_threadpool_free(
Pointer< ggml_threadpool> threadpool) → void -
ggml_threadpool_get_n_threads(
Pointer< ggml_threadpool> threadpool) → int -
ggml_threadpool_new(
Pointer< ggml_threadpool_params> params) → Pointer<ggml_threadpool> -
ggml_threadpool_params_default(
int n_threads) → ggml_threadpool_params -
ggml_threadpool_params_init(
Pointer< ggml_threadpool_params> p, int n_threads) → void -
ggml_threadpool_params_match(
Pointer< ggml_threadpool_params> p0, Pointer<ggml_threadpool_params> p1) → bool -
ggml_threadpool_pause(
Pointer< ggml_threadpool> threadpool) → void -
ggml_threadpool_resume(
Pointer< ggml_threadpool> threadpool) → void -
ggml_time_init(
) → void - misc
-
ggml_time_ms(
) → int -
ggml_time_us(
) → int -
ggml_timestep_embedding(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> timesteps, int dim, int max_period) → Pointer<ggml_tensor> -
Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
timesteps:
N,
return:N, dim
-
ggml_top_k(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int k) → Pointer<ggml_tensor> - top k elements per row
-
ggml_transpose(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> - alias for ggml_permute(ctx, a, 1, 0, 2, 3)
-
ggml_type_name(
int type) → Pointer< Char> -
ggml_type_size(
int type) → int -
ggml_type_sizef(
int type) → double -
ggml_unary(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int op) → Pointer<ggml_tensor> -
ggml_unary_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int op) → Pointer<ggml_tensor> -
ggml_unary_op_name(
int op) → Pointer< Char> -
ggml_unravel_index(
Pointer< ggml_tensor> tensor, int i, Pointer<Int64> i0, Pointer<Int64> i1, Pointer<Int64> i2, Pointer<Int64> i3) → void - Converts a flat index into coordinates
-
ggml_upscale(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int scale_factor) → Pointer<ggml_tensor> - nearest interpolate multiplies ne0 and ne1 by scale factor used in stable-diffusion
-
ggml_upscale_ext(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor> - nearest interpolate nearest interpolate to specified dimensions used in tortoise.cpp
-
ggml_used_mem(
Pointer< ggml_context> ctx) → int -
ggml_validate_row_data(
int type, Pointer< Void> data, int nbytes) → bool -
ggml_view_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int offset) → Pointer<ggml_tensor> - offset in bytes
-
ggml_view_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int nb1, int offset) → Pointer<ggml_tensor> -
ggml_view_3d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int nb1, int nb2, int offset) → Pointer<ggml_tensor> -
ggml_view_4d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor> -
ggml_view_tensor(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> src) → Pointer<ggml_tensor> -
ggml_win_part(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int w) → Pointer<ggml_tensor> - partition into non-overlapping windows with padding if needed example: a: 768 64 64 1 w: 14 res: 768 14 14 25 used in sam
-
ggml_win_unpart(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int w0, int h0, int w) → Pointer<ggml_tensor> - reverse of ggml_win_part used in sam
-
noSuchMethod(
Invocation invocation) → dynamic -
Invoked when a nonexistent method or property is accessed.
inherited
-
toString(
) → String -
A string representation of this object.
inherited
-
whisper_bench_ggml_mul_mat(
int n_threads) → int -
whisper_bench_ggml_mul_mat_str(
int n_threads) → Pointer< Char> -
whisper_bench_memcpy(
int n_threads) → int - Temporary helpers needed for exposing ggml interface
-
whisper_bench_memcpy_str(
int n_threads) → Pointer< Char> -
whisper_context_default_params(
) → whisper_context_params -
whisper_context_default_params_by_ref(
) → Pointer< whisper_context_params> - NOTE: this function allocates memory, and it is the responsibility of the caller to free the pointer - see whisper_free_context_params & whisper_free_params()
-
whisper_ctx_init_openvino_encoder(
Pointer< whisper_context> ctx, Pointer<Char> model_path, Pointer<Char> device, Pointer<Char> cache_dir) → int -
whisper_ctx_init_openvino_encoder_with_state(
Pointer< whisper_context> ctx, Pointer<whisper_state> state, Pointer<Char> model_path, Pointer<Char> device, Pointer<Char> cache_dir) → int - Given a context, enable use of OpenVINO for encode inference. model_path: Optional path to OpenVINO encoder IR model. If set to nullptr, the path will be generated from the ggml model path that was passed in to whisper_init_from_file. For example, if 'path_model' was "/path/to/ggml-base.en.bin", then OpenVINO IR model path will be assumed to be "/path/to/ggml-base.en-encoder-openvino.xml". device: OpenVINO device to run inference on ("CPU", "GPU", etc.) cache_dir: Optional cache directory that can speed up init time, especially for GPU, by caching compiled 'blobs' there. Set to nullptr if not used. Returns 0 on success. If OpenVINO is not enabled in build, this simply returns 1.
-
whisper_decode(
Pointer< whisper_context> ctx, Pointer<whisper_token> tokens, int n_tokens, int n_past, int n_threads) → int - Run the Whisper decoder to obtain the logits and probabilities for the next token. Make sure to call whisper_encode() first. tokens + n_tokens is the provided context for the decoder. n_past is the number of tokens to use from previous decoder calls. Returns 0 on success TODO: add support for multiple decoders
-
whisper_decode_with_state(
Pointer< whisper_context> ctx, Pointer<whisper_state> state, Pointer<whisper_token> tokens, int n_tokens, int n_past, int n_threads) → int -
whisper_encode(
Pointer< whisper_context> ctx, int offset, int n_threads) → int - Run the Whisper encoder on the log mel spectrogram stored inside the default state in the provided whisper context. Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first. offset can be used to specify the offset of the first frame in the spectrogram. Returns 0 on success
-
whisper_encode_with_state(
Pointer< whisper_context> ctx, Pointer<whisper_state> state, int offset, int n_threads) → int -
whisper_free(
Pointer< whisper_context> ctx) → void - Frees all allocated memory
-
whisper_free_context_params(
Pointer< whisper_context_params> params) → void -
whisper_free_params(
Pointer< whisper_full_params> params) → void -
whisper_free_state(
Pointer< whisper_state> state) → void -
whisper_full(
Pointer< whisper_context> ctx, whisper_full_params params, Pointer<Float> samples, int n_samples) → int - Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text Not thread safe for same context Uses the specified decoding strategy to obtain the text.
-
whisper_full_default_params(
int strategy) → whisper_full_params -
whisper_full_default_params_by_ref(
int strategy) → Pointer< whisper_full_params> -
whisper_full_get_segment_no_speech_prob(
Pointer< whisper_context> ctx, int i_segment) → double - Get the no_speech probability for the specified segment
-
whisper_full_get_segment_no_speech_prob_from_state(
Pointer< whisper_state> state, int i_segment) → double -
whisper_full_get_segment_speaker_turn_next(
Pointer< whisper_context> ctx, int i_segment) → bool - Get whether the next segment is predicted as a speaker turn
-
whisper_full_get_segment_speaker_turn_next_from_state(
Pointer< whisper_state> state, int i_segment) → bool -
whisper_full_get_segment_t0(
Pointer< whisper_context> ctx, int i_segment) → int - Get the start and end time of the specified segment
-
whisper_full_get_segment_t0_from_state(
Pointer< whisper_state> state, int i_segment) → int -
whisper_full_get_segment_t1(
Pointer< whisper_context> ctx, int i_segment) → int -
whisper_full_get_segment_t1_from_state(
Pointer< whisper_state> state, int i_segment) → int -
whisper_full_get_segment_text(
Pointer< whisper_context> ctx, int i_segment) → Pointer<Char> - Get the text of the specified segment
-
whisper_full_get_segment_text_from_state(
Pointer< whisper_state> state, int i_segment) → Pointer<Char> -
whisper_full_get_token_data(
Pointer< whisper_context> ctx, int i_segment, int i_token) → whisper_token_data - Get token data for the specified token in the specified segment This contains probabilities, timestamps, etc.
-
whisper_full_get_token_data_from_state(
Pointer< whisper_state> state, int i_segment, int i_token) → whisper_token_data -
whisper_full_get_token_id(
Pointer< whisper_context> ctx, int i_segment, int i_token) → int -
whisper_full_get_token_id_from_state(
Pointer< whisper_state> state, int i_segment, int i_token) → int -
whisper_full_get_token_p(
Pointer< whisper_context> ctx, int i_segment, int i_token) → double - Get the probability of the specified token in the specified segment
-
whisper_full_get_token_p_from_state(
Pointer< whisper_state> state, int i_segment, int i_token) → double -
whisper_full_get_token_text(
Pointer< whisper_context> ctx, int i_segment, int i_token) → Pointer<Char> - Get the token text of the specified token in the specified segment
-
whisper_full_get_token_text_from_state(
Pointer< whisper_context> ctx, Pointer<whisper_state> state, int i_segment, int i_token) → Pointer<Char> -
whisper_full_lang_id(
Pointer< whisper_context> ctx) → int - Language id associated with the context's default state
-
whisper_full_lang_id_from_state(
Pointer< whisper_state> state) → int - Language id associated with the provided state
-
whisper_full_n_segments(
Pointer< whisper_context> ctx) → int - Number of generated text segments A segment can be a few words, a sentence, or even a paragraph.
-
whisper_full_n_segments_from_state(
Pointer< whisper_state> state) → int -
whisper_full_n_tokens(
Pointer< whisper_context> ctx, int i_segment) → int - Get number of tokens in the specified segment
-
whisper_full_n_tokens_from_state(
Pointer< whisper_state> state, int i_segment) → int -
whisper_full_parallel(
Pointer< whisper_context> ctx, whisper_full_params params, Pointer<Float> samples, int n_samples, int n_processors) → int - Split the input audio in chunks and process each chunk separately using whisper_full_with_state() Result is stored in the default state of the context Not thread safe if executed in parallel on the same context. It seems this approach can offer some speedup in some cases. However, the transcription accuracy can be worse at the beginning and end of each chunk.
-
whisper_full_with_state(
Pointer< whisper_context> ctx, Pointer<whisper_state> state, whisper_full_params params, Pointer<Float> samples, int n_samples) → int -
whisper_get_logits(
Pointer< whisper_context> ctx) → Pointer<Float> - Token logits obtained from the last call to whisper_decode() The logits for the last token are stored in the last row Rows: n_tokens Cols: n_vocab
-
whisper_get_logits_from_state(
Pointer< whisper_state> state) → Pointer<Float> -
whisper_get_timings(
Pointer< whisper_context> ctx) → Pointer<whisper_timings> -
whisper_init(
Pointer< whisper_model_loader> loader) → Pointer<whisper_context> -
whisper_init_from_buffer(
Pointer< Void> buffer, int buffer_size) → Pointer<whisper_context> -
whisper_init_from_buffer_no_state(
Pointer< Void> buffer, int buffer_size) → Pointer<whisper_context> -
whisper_init_from_buffer_with_params(
Pointer< Void> buffer, int buffer_size, whisper_context_params params) → Pointer<whisper_context> -
whisper_init_from_buffer_with_params_no_state(
Pointer< Void> buffer, int buffer_size, whisper_context_params params) → Pointer<whisper_context> -
whisper_init_from_file(
Pointer< Char> path_model) → Pointer<whisper_context> -
whisper_init_from_file_no_state(
Pointer< Char> path_model) → Pointer<whisper_context> -
whisper_init_from_file_with_params(
Pointer< Char> path_model, whisper_context_params params) → Pointer<whisper_context> - Various functions for loading a ggml whisper model. Allocate (almost) all memory needed for the model. Return NULL on failure
-
whisper_init_from_file_with_params_no_state(
Pointer< Char> path_model, whisper_context_params params) → Pointer<whisper_context> - These are the same as the above, but the internal state of the context is not allocated automatically It is the responsibility of the caller to allocate the state using whisper_init_state() (#523)
-
whisper_init_no_state(
Pointer< whisper_model_loader> loader) → Pointer<whisper_context> -
whisper_init_state(
Pointer< whisper_context> ctx) → Pointer<whisper_state> -
whisper_init_with_params(
Pointer< whisper_model_loader> loader, whisper_context_params params) → Pointer<whisper_context> -
whisper_init_with_params_no_state(
Pointer< whisper_model_loader> loader, whisper_context_params params) → Pointer<whisper_context> -
whisper_is_multilingual(
Pointer< whisper_context> ctx) → int -
whisper_lang_auto_detect(
Pointer< whisper_context> ctx, int offset_ms, int n_threads, Pointer<Float> lang_probs) → int - Use mel data at offset_ms to try and auto-detect the spoken language Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first Returns the top language id or negative on failure If not null, fills the lang_probs array with the probabilities of all languages The array must be whisper_lang_max_id() + 1 in size ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
-
whisper_lang_auto_detect_with_state(
Pointer< whisper_context> ctx, Pointer<whisper_state> state, int offset_ms, int n_threads, Pointer<Float> lang_probs) → int -
whisper_lang_id(
Pointer< Char> lang) → int - Return the id of the specified language, returns -1 if not found Examples: "de" -> 2 "german" -> 2
-
whisper_lang_max_id(
) → int - Largest language id (i.e. number of available languages - 1)
-
whisper_lang_str(
int id) → Pointer< Char> - Return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found
-
whisper_lang_str_full(
int id) → Pointer< Char> - Return the short string of the specified language name (e.g. 2 -> "german"), returns nullptr if not found
-
whisper_log_set(
int log_callback, Pointer< Void> user_data) → void - Control logging output; default behavior is to print to stderr
-
whisper_model_ftype(
Pointer< whisper_context> ctx) → int -
whisper_model_n_audio_ctx(
Pointer< whisper_context> ctx) → int -
whisper_model_n_audio_head(
Pointer< whisper_context> ctx) → int -
whisper_model_n_audio_layer(
Pointer< whisper_context> ctx) → int -
whisper_model_n_audio_state(
Pointer< whisper_context> ctx) → int -
whisper_model_n_mels(
Pointer< whisper_context> ctx) → int -
whisper_model_n_text_ctx(
Pointer< whisper_context> ctx) → int -
whisper_model_n_text_head(
Pointer< whisper_context> ctx) → int -
whisper_model_n_text_layer(
Pointer< whisper_context> ctx) → int -
whisper_model_n_text_state(
Pointer< whisper_context> ctx) → int -
whisper_model_n_vocab(
Pointer< whisper_context> ctx) → int -
whisper_model_type(
Pointer< whisper_context> ctx) → int -
whisper_model_type_readable(
Pointer< whisper_context> ctx) → Pointer<Char> -
whisper_n_audio_ctx(
Pointer< whisper_context> ctx) → int -
whisper_n_len(
Pointer< whisper_context> ctx) → int -
whisper_n_len_from_state(
Pointer< whisper_state> state) → int -
whisper_n_text_ctx(
Pointer< whisper_context> ctx) → int -
whisper_n_vocab(
Pointer< whisper_context> ctx) → int -
whisper_pcm_to_mel(
Pointer< whisper_context> ctx, Pointer<Float> samples, int n_samples, int n_threads) → int - Convert RAW PCM audio to log mel spectrogram. The resulting spectrogram is stored inside the default state of the provided whisper context. Returns 0 on success
-
whisper_pcm_to_mel_with_state(
Pointer< whisper_context> ctx, Pointer<whisper_state> state, Pointer<Float> samples, int n_samples, int n_threads) → int -
whisper_print_system_info(
) → Pointer< Char> - Print system information
-
whisper_print_timings(
Pointer< whisper_context> ctx) → void -
whisper_reset_timings(
Pointer< whisper_context> ctx) → void -
whisper_set_mel(
Pointer< whisper_context> ctx, Pointer<Float> data, int n_len, int n_mel) → int - This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context. Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram. n_mel must be 80 Returns 0 on success
-
whisper_set_mel_with_state(
Pointer< whisper_context> ctx, Pointer<whisper_state> state, Pointer<Float> data, int n_len, int n_mel) → int -
whisper_token_beg(
Pointer< whisper_context> ctx) → int -
whisper_token_count(
Pointer< whisper_context> ctx, Pointer<Char> text) → int - Return the number of tokens in the provided text Equivalent to: -whisper_tokenize(ctx, text, NULL, 0)
-
whisper_token_eot(
Pointer< whisper_context> ctx) → int - Special tokens
-
whisper_token_lang(
Pointer< whisper_context> ctx, int lang_id) → int -
whisper_token_nosp(
Pointer< whisper_context> ctx) → int -
whisper_token_not(
Pointer< whisper_context> ctx) → int -
whisper_token_prev(
Pointer< whisper_context> ctx) → int -
whisper_token_solm(
Pointer< whisper_context> ctx) → int -
whisper_token_sot(
Pointer< whisper_context> ctx) → int -
whisper_token_to_str(
Pointer< whisper_context> ctx, int token) → Pointer<Char> - Token Id -> String. Uses the vocabulary in the provided context
-
whisper_token_transcribe(
Pointer< whisper_context> ctx) → int -
whisper_token_translate(
Pointer< whisper_context> ctx) → int - Task tokens
-
whisper_tokenize(
Pointer< whisper_context> ctx, Pointer<Char> text, Pointer<whisper_token> tokens, int n_max_tokens) → int - Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens. Returns the number of tokens on success, no more than n_max_tokens Returns a negative number on failure - the number of tokens that would have been returned TODO: not sure if correct
Operators
-
operator ==(
Object other) → bool -
The equality operator.
inherited