mirror of
https://github.com/kyegomez/OpenMythos.git
synced 2026-05-02 17:43:27 +02:00
[docs][readme-badges][add pypi twitter github badges to
readme][improvement][init-sort][sort imports alphabetically in init][improvement][version-bump][bump version to 0.3.0][feat][tokenizer-class][add MythosTokenizer to init exports][feat][test-tokenizer][add tokenizer test suite with printed output]
This commit is contained in:
parent
5ffb897dcf
commit
12f6c5b32e
27
README.md
27
README.md
@ -1,5 +1,32 @@
|
||||
# OpenMythos
|
||||
|
||||
<p align="center">
|
||||
<a href="https://pypi.org/project/open-mythos/" target="_blank">
|
||||
<picture>
|
||||
<source srcset="https://img.shields.io/pypi/v/open-mythos?style=for-the-badge&color=3670A0" media="(prefers-color-scheme: dark)">
|
||||
<img alt="Version" src="https://img.shields.io/pypi/v/open-mythos?style=for-the-badge&color=3670A0">
|
||||
</picture>
|
||||
</a>
|
||||
<a href="https://pypi.org/project/open-mythos/" target="_blank">
|
||||
<picture>
|
||||
<source srcset="https://img.shields.io/pypi/dm/open-mythos?style=for-the-badge&color=3670A0" media="(prefers-color-scheme: dark)">
|
||||
<img alt="Downloads" src="https://img.shields.io/pypi/dm/open-mythos?style=for-the-badge&color=3670A0">
|
||||
</picture>
|
||||
</a>
|
||||
<a href="https://twitter.com/kyegomezb/">
|
||||
<picture>
|
||||
<source srcset="https://img.shields.io/badge/Twitter-Follow-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white" media="(prefers-color-scheme: dark)">
|
||||
<img src="https://img.shields.io/badge/Twitter-Follow-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white" alt="Twitter">
|
||||
</picture>
|
||||
</a>
|
||||
<a href="https://github.com/kyegomez/OpenMythos" target="_blank">
|
||||
<picture>
|
||||
<source srcset="https://img.shields.io/badge/GitHub-Repo-171515?style=for-the-badge&logo=github&logoColor=white" media="(prefers-color-scheme: dark)">
|
||||
<img src="https://img.shields.io/badge/GitHub-Repo-171515?style=for-the-badge&logo=github&logoColor=white" alt="GitHub">
|
||||
</picture>
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> **Disclaimer:** OpenMythos is an independent, community-driven theoretical reconstruction based solely on publicly available research and speculation. It is not affiliated with, endorsed by, or connected to Anthropic or any of their proprietary systems.
|
||||
|
||||
OpenMythos is an open-source, theoretical implementation of the Claude Mythos model. It implements a Recurrent-Depth Transformer (RDT) with three stages: **Prelude** (transformer blocks), a looped **Recurrent Block** (up to `max_loop_iters`), and a final **Coda**. Attention is switchable between MLA and GQA, and the feed-forward uses a sparse MoE with routed and shared experts ideal for exploring compute-adaptive, depth-variable reasoning.
|
||||
|
||||
@ -1,29 +1,29 @@
|
||||
from open_mythos.main import (
|
||||
MythosConfig,
|
||||
RMSNorm,
|
||||
GQAttention,
|
||||
MLAttention,
|
||||
Expert,
|
||||
MoEFFN,
|
||||
LoRAAdapter,
|
||||
TransformerBlock,
|
||||
LTIInjection,
|
||||
ACTHalting,
|
||||
RecurrentBlock,
|
||||
Expert,
|
||||
GQAttention,
|
||||
LoRAAdapter,
|
||||
LTIInjection,
|
||||
MLAttention,
|
||||
MoEFFN,
|
||||
MythosConfig,
|
||||
OpenMythos,
|
||||
precompute_rope_freqs,
|
||||
RecurrentBlock,
|
||||
RMSNorm,
|
||||
TransformerBlock,
|
||||
apply_rope,
|
||||
loop_index_embedding,
|
||||
precompute_rope_freqs,
|
||||
)
|
||||
from open_mythos.tokenizer import MythosTokenizer
|
||||
from open_mythos.variants import (
|
||||
mythos_1b,
|
||||
mythos_1t,
|
||||
mythos_3b,
|
||||
mythos_10b,
|
||||
mythos_50b,
|
||||
mythos_100b,
|
||||
mythos_500b,
|
||||
mythos_1t,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
|
||||
1134
open_mythos/moda.py
Normal file
1134
open_mythos/moda.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "open-mythos"
|
||||
version = "0.2.0"
|
||||
version = "0.3.0"
|
||||
description = "OpenMythos — open-source theoretical reconstruction of the Claude Mythos Recurrent-Depth Transformer architecture"
|
||||
license = "MIT"
|
||||
authors = ["Kye Gomez <kye@swarms.world>"]
|
||||
|
||||
@ -1,10 +1,3 @@
|
||||
"""
|
||||
pytest suite for OpenMythos (second.py).
|
||||
Tests every major class and feature: shapes, correctness invariants, and
|
||||
architecture-specific properties (LTI stability, ACT halting, depth extrapolation,
|
||||
KV cache consistency, GQA vs MLA swap).
|
||||
"""
|
||||
|
||||
import torch
|
||||
import pytest
|
||||
from open_mythos.main import (
|
||||
|
||||
75
tests/test_tokenizer.py
Normal file
75
tests/test_tokenizer.py
Normal file
@ -0,0 +1,75 @@
|
||||
import pytest
|
||||
from open_mythos.tokenizer import MythosTokenizer
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def tokenizer():
|
||||
tok = MythosTokenizer()
|
||||
print(f"\nLoaded tokenizer: {tok.tokenizer.name_or_path}")
|
||||
return tok
|
||||
|
||||
|
||||
def test_loads(tokenizer):
|
||||
assert tokenizer is not None
|
||||
print(f"Tokenizer: {tokenizer}")
|
||||
|
||||
|
||||
def test_vocab_size(tokenizer):
|
||||
size = tokenizer.vocab_size
|
||||
print(f"Vocab size: {size:,}")
|
||||
assert size > 0
|
||||
|
||||
|
||||
def test_encode_returns_list_of_ints(tokenizer):
|
||||
ids = tokenizer.encode("Hello, world!")
|
||||
print(f"encode('Hello, world!') → {ids}")
|
||||
assert isinstance(ids, list)
|
||||
assert all(isinstance(i, int) for i in ids)
|
||||
assert len(ids) > 0
|
||||
|
||||
|
||||
def test_encode_empty_string(tokenizer):
|
||||
ids = tokenizer.encode("")
|
||||
print(f"encode('') → {ids}")
|
||||
assert isinstance(ids, list)
|
||||
|
||||
|
||||
def test_decode_returns_string(tokenizer):
|
||||
ids = tokenizer.encode("Hello, world!")
|
||||
text = tokenizer.decode(ids)
|
||||
print(f"decode({ids}) → '{text}'")
|
||||
assert isinstance(text, str)
|
||||
|
||||
|
||||
def test_roundtrip(tokenizer):
|
||||
original = "The quick brown fox jumps over the lazy dog."
|
||||
ids = tokenizer.encode(original)
|
||||
recovered = tokenizer.decode(ids)
|
||||
print(f"original: '{original}'")
|
||||
print(f"token ids: {ids}")
|
||||
print(f"recovered: '{recovered}'")
|
||||
assert original in recovered or recovered in original
|
||||
|
||||
|
||||
def test_encode_long_text(tokenizer):
|
||||
text = "OpenMythos is a recurrent depth transformer. " * 100
|
||||
ids = tokenizer.encode(text)
|
||||
print(f"Long text ({len(text)} chars) → {len(ids)} tokens")
|
||||
assert len(ids) > 100
|
||||
|
||||
|
||||
def test_custom_model_id():
|
||||
tok = MythosTokenizer(model_id="openai/gpt-oss-20b")
|
||||
print(f"Custom model_id vocab size: {tok.vocab_size:,}")
|
||||
assert tok.vocab_size > 0
|
||||
|
||||
|
||||
def test_vocab_size_consistent(tokenizer):
|
||||
outer = tokenizer.vocab_size
|
||||
inner = tokenizer.tokenizer.vocab_size
|
||||
print(f"vocab_size property: {outer:,} | inner tokenizer.vocab_size: {inner:,}")
|
||||
assert outer == inner
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "--verbose", "-s"])
|
||||
Loading…
x
Reference in New Issue
Block a user