| { | |
| "added_tokens_decoder": {}, | |
| "additional_special_tokens": [ | |
| "<|startoftext|>", | |
| "<|extra_0|>", | |
| "<|extra_4|>", | |
| "<|extra_5|>", | |
| "<|eos|>" | |
| ], | |
| "architectures": [ | |
| "GPT2LMHeadModel" | |
| ], | |
| "auto_map": { | |
| "AutoTokenizer": [ | |
| "tokenization_hy.HYTokenizer", | |
| null | |
| ] | |
| }, | |
| "chat_template": "{% set context = {'has_head': true} %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = message['content'] %}{% if loop.index0 == 0 %}{% if content == '' %}{% set _ = context.update({'has_head': false}) %}{% elif message['role'] == 'system' %}{% set content = '<|startoftext|>' + content + '<|extra_4|>' %}{% endif %}{% endif %}{% if message['role'] == 'user' %}{% if loop.index0 == 1 and not context.has_head %}{% set content = '<|startoftext|>' + content %}{% endif %}{% if loop.index0 == 1 and context.has_head %}{% set content = content + '<|extra_0|>' %}{% else %}{% set content = '<|startoftext|>' + content + '<|extra_0|>' %}{% endif %}{% elif message['role'] == 'assistant' %}{% set content = content + '<|eos|>' %}{% endif %}{{ content }}{% endfor %}", | |
| "clean_up_tokenization_spaces": true, | |
| "model_max_length": 1048576, | |
| "model_type": "gpt2", | |
| "pad_token": "<|pad|>", | |
| "tokenizer_class": "HYTokenizer" | |
| } | |