hsinhoyeh commited on
Commit
7efcda7
·
unverified ·
1 Parent(s): 4493ffd

go : add beamsize/entropythold/maxcontext to context interface (#2350)

Browse files

* feat(go binding): add beamsize/entropythold/maxcontext to context interface

fixes: #2349

* fix go building build

* fix dynamic link .so and header.h

* remove LD_LIBRARY_PATH

* remove ggml obj from whisper dynamic lib

* drop LIB_GGML

Makefile CHANGED
@@ -971,7 +971,8 @@ $(LIB_WHISPER): \
971
  $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
972
 
973
  $(LIB_WHISPER_S): \
974
- $(OBJ_WHISPER)
 
975
  ar rcs $(LIB_WHISPER_S) $^
976
 
977
  # common
 
971
  $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
972
 
973
  $(LIB_WHISPER_S): \
974
+ $(OBJ_WHISPER) \
975
+ $(OBJ_GGML)
976
  ar rcs $(LIB_WHISPER_S) $^
977
 
978
  # common
bindings/go/Makefile CHANGED
@@ -14,7 +14,7 @@ GGML_METAL_PATH_RESOURCES := $(abspath ../..)
14
  BUILD_DIR := build
15
  MODELS_DIR := models
16
  EXAMPLES_DIR := $(wildcard examples/*)
17
- INCLUDE_PATH := $(abspath ../..)
18
  LIBRARY_PATH := $(abspath ../..)
19
 
20
  ifeq ($(UNAME_S),Darwin)
 
14
  BUILD_DIR := build
15
  MODELS_DIR := models
16
  EXAMPLES_DIR := $(wildcard examples/*)
17
+ INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
18
  LIBRARY_PATH := $(abspath ../..)
19
 
20
  ifeq ($(UNAME_S),Darwin)
bindings/go/params.go CHANGED
@@ -119,6 +119,18 @@ func (p *Params) SetAudioCtx(n int) {
119
  p.audio_ctx = C.int(n)
120
  }
121
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  // Set initial prompt
123
  func (p *Params) SetInitialPrompt(prompt string) {
124
  p.initial_prompt = C.CString(prompt)
@@ -149,6 +161,8 @@ func (p *Params) String() string {
149
  str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
150
  str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
151
  str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
 
 
152
  if p.translate {
153
  str += " translate"
154
  }
 
119
  p.audio_ctx = C.int(n)
120
  }
121
 
122
+ func (p *Params) SetMaxContext(n int) {
123
+ p.n_max_text_ctx = C.int(n)
124
+ }
125
+
126
+ func (p *Params) SetBeamSize(n int) {
127
+ p.beam_search.beam_size = C.int(n)
128
+ }
129
+
130
+ func (p *Params) SetEntropyThold(t float32) {
131
+ p.entropy_thold = C.float(t)
132
+ }
133
+
134
  // Set initial prompt
135
  func (p *Params) SetInitialPrompt(prompt string) {
136
  p.initial_prompt = C.CString(prompt)
 
161
  str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
162
  str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
163
  str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
164
+ str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
165
+ str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
166
  if p.translate {
167
  str += " translate"
168
  }
bindings/go/pkg/whisper/context.go CHANGED
@@ -125,6 +125,21 @@ func (context *context) SetAudioCtx(n uint) {
125
  context.params.SetAudioCtx(int(n))
126
  }
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  // Set initial prompt
129
  func (context *context) SetInitialPrompt(prompt string) {
130
  context.params.SetInitialPrompt(prompt)
 
125
  context.params.SetAudioCtx(int(n))
126
  }
127
 
128
+ // Set maximum number of text context tokens to store
129
+ func (context *context) SetMaxContext(n int) {
130
+ context.params.SetMaxContext(n)
131
+ }
132
+
133
+ // Set Beam Size
134
+ func (context *context) SetBeamSize(n int) {
135
+ context.params.SetBeamSize(n)
136
+ }
137
+
138
+ // Set Entropy threshold
139
+ func (context *context) SetEntropyThold(t float32) {
140
+ context.params.SetEntropyThold(t)
141
+ }
142
+
143
  // Set initial prompt
144
  func (context *context) SetInitialPrompt(prompt string) {
145
  context.params.SetInitialPrompt(prompt)
bindings/go/pkg/whisper/interface.go CHANGED
@@ -48,6 +48,9 @@ type Context interface {
48
  SetTokenTimestamps(bool) // Set token timestamps flag
49
  SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
50
  SetAudioCtx(uint) // Set audio encoder context
 
 
 
51
  SetInitialPrompt(prompt string) // Set initial prompt
52
 
53
  // Process mono audio data and return any errors.
 
48
  SetTokenTimestamps(bool) // Set token timestamps flag
49
  SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
50
  SetAudioCtx(uint) // Set audio encoder context
51
+ SetMaxContext(n int) // Set maximum number of text context tokens to store
52
+ SetBeamSize(n int) // Set Beam Size
53
+ SetEntropyThold(t float32) // Set Entropy threshold
54
  SetInitialPrompt(prompt string) // Set initial prompt
55
 
56
  // Process mono audio data and return any errors.
bindings/go/whisper.go CHANGED
@@ -9,7 +9,7 @@ import (
9
  // CGO
10
 
11
  /*
12
- #cgo LDFLAGS: -lwhisper -lm -lstdc++
13
  #cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
14
  #include <whisper.h>
15
  #include <stdlib.h>
 
9
  // CGO
10
 
11
  /*
12
+ #cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
13
  #cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
14
  #include <whisper.h>
15
  #include <stdlib.h>