Spaces:
Running
Running
hsinhoyeh
commited on
go : add beamsize/entropythold/maxcontext to context interface (#2350)
Browse files* feat(go binding): add beamsize/entropythold/maxcontext to context interface
fixes: #2349
* fix go building build
* fix dynamic link .so and header.h
* remove LD_LIBRARY_PATH
* remove ggml obj from whisper dynamic lib
* drop LIB_GGML
- Makefile +2 -1
- bindings/go/Makefile +1 -1
- bindings/go/params.go +14 -0
- bindings/go/pkg/whisper/context.go +15 -0
- bindings/go/pkg/whisper/interface.go +3 -0
- bindings/go/whisper.go +1 -1
Makefile
CHANGED
|
@@ -971,7 +971,8 @@ $(LIB_WHISPER): \
|
|
| 971 |
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
| 972 |
|
| 973 |
$(LIB_WHISPER_S): \
|
| 974 |
-
$(OBJ_WHISPER)
|
|
|
|
| 975 |
ar rcs $(LIB_WHISPER_S) $^
|
| 976 |
|
| 977 |
# common
|
|
|
|
| 971 |
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
| 972 |
|
| 973 |
$(LIB_WHISPER_S): \
|
| 974 |
+
$(OBJ_WHISPER) \
|
| 975 |
+
$(OBJ_GGML)
|
| 976 |
ar rcs $(LIB_WHISPER_S) $^
|
| 977 |
|
| 978 |
# common
|
bindings/go/Makefile
CHANGED
|
@@ -14,7 +14,7 @@ GGML_METAL_PATH_RESOURCES := $(abspath ../..)
|
|
| 14 |
BUILD_DIR := build
|
| 15 |
MODELS_DIR := models
|
| 16 |
EXAMPLES_DIR := $(wildcard examples/*)
|
| 17 |
-
INCLUDE_PATH := $(abspath
|
| 18 |
LIBRARY_PATH := $(abspath ../..)
|
| 19 |
|
| 20 |
ifeq ($(UNAME_S),Darwin)
|
|
|
|
| 14 |
BUILD_DIR := build
|
| 15 |
MODELS_DIR := models
|
| 16 |
EXAMPLES_DIR := $(wildcard examples/*)
|
| 17 |
+
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
|
| 18 |
LIBRARY_PATH := $(abspath ../..)
|
| 19 |
|
| 20 |
ifeq ($(UNAME_S),Darwin)
|
bindings/go/params.go
CHANGED
|
@@ -119,6 +119,18 @@ func (p *Params) SetAudioCtx(n int) {
|
|
| 119 |
p.audio_ctx = C.int(n)
|
| 120 |
}
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
// Set initial prompt
|
| 123 |
func (p *Params) SetInitialPrompt(prompt string) {
|
| 124 |
p.initial_prompt = C.CString(prompt)
|
|
@@ -149,6 +161,8 @@ func (p *Params) String() string {
|
|
| 149 |
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
| 150 |
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
|
| 151 |
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
|
|
|
|
|
|
|
| 152 |
if p.translate {
|
| 153 |
str += " translate"
|
| 154 |
}
|
|
|
|
| 119 |
p.audio_ctx = C.int(n)
|
| 120 |
}
|
| 121 |
|
| 122 |
+
func (p *Params) SetMaxContext(n int) {
|
| 123 |
+
p.n_max_text_ctx = C.int(n)
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
func (p *Params) SetBeamSize(n int) {
|
| 127 |
+
p.beam_search.beam_size = C.int(n)
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
func (p *Params) SetEntropyThold(t float32) {
|
| 131 |
+
p.entropy_thold = C.float(t)
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
// Set initial prompt
|
| 135 |
func (p *Params) SetInitialPrompt(prompt string) {
|
| 136 |
p.initial_prompt = C.CString(prompt)
|
|
|
|
| 161 |
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
| 162 |
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
|
| 163 |
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
|
| 164 |
+
str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
|
| 165 |
+
str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
|
| 166 |
if p.translate {
|
| 167 |
str += " translate"
|
| 168 |
}
|
bindings/go/pkg/whisper/context.go
CHANGED
|
@@ -125,6 +125,21 @@ func (context *context) SetAudioCtx(n uint) {
|
|
| 125 |
context.params.SetAudioCtx(int(n))
|
| 126 |
}
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
// Set initial prompt
|
| 129 |
func (context *context) SetInitialPrompt(prompt string) {
|
| 130 |
context.params.SetInitialPrompt(prompt)
|
|
|
|
| 125 |
context.params.SetAudioCtx(int(n))
|
| 126 |
}
|
| 127 |
|
| 128 |
+
// Set maximum number of text context tokens to store
|
| 129 |
+
func (context *context) SetMaxContext(n int) {
|
| 130 |
+
context.params.SetMaxContext(n)
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
// Set Beam Size
|
| 134 |
+
func (context *context) SetBeamSize(n int) {
|
| 135 |
+
context.params.SetBeamSize(n)
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
// Set Entropy threshold
|
| 139 |
+
func (context *context) SetEntropyThold(t float32) {
|
| 140 |
+
context.params.SetEntropyThold(t)
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
// Set initial prompt
|
| 144 |
func (context *context) SetInitialPrompt(prompt string) {
|
| 145 |
context.params.SetInitialPrompt(prompt)
|
bindings/go/pkg/whisper/interface.go
CHANGED
|
@@ -48,6 +48,9 @@ type Context interface {
|
|
| 48 |
SetTokenTimestamps(bool) // Set token timestamps flag
|
| 49 |
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
| 50 |
SetAudioCtx(uint) // Set audio encoder context
|
|
|
|
|
|
|
|
|
|
| 51 |
SetInitialPrompt(prompt string) // Set initial prompt
|
| 52 |
|
| 53 |
// Process mono audio data and return any errors.
|
|
|
|
| 48 |
SetTokenTimestamps(bool) // Set token timestamps flag
|
| 49 |
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
| 50 |
SetAudioCtx(uint) // Set audio encoder context
|
| 51 |
+
SetMaxContext(n int) // Set maximum number of text context tokens to store
|
| 52 |
+
SetBeamSize(n int) // Set Beam Size
|
| 53 |
+
SetEntropyThold(t float32) // Set Entropy threshold
|
| 54 |
SetInitialPrompt(prompt string) // Set initial prompt
|
| 55 |
|
| 56 |
// Process mono audio data and return any errors.
|
bindings/go/whisper.go
CHANGED
|
@@ -9,7 +9,7 @@ import (
|
|
| 9 |
// CGO
|
| 10 |
|
| 11 |
/*
|
| 12 |
-
#cgo LDFLAGS: -lwhisper -lm -lstdc++
|
| 13 |
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
|
| 14 |
#include <whisper.h>
|
| 15 |
#include <stdlib.h>
|
|
|
|
| 9 |
// CGO
|
| 10 |
|
| 11 |
/*
|
| 12 |
+
#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
|
| 13 |
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
|
| 14 |
#include <whisper.h>
|
| 15 |
#include <stdlib.h>
|