from langchain.docstore.document import Document | |
def segments_to_documents(segments, source_path): | |
"""Convert whisper segments to LangChain Document objects.""" | |
return [ | |
Document( | |
page_content=s["text"], | |
metadata={"start": s["start"], "end": s["end"], "source": source_path}, | |
) | |
for s in segments | |
] | |