[docs][readme-badges][add pypi twitter github badges to

readme][improvement][init-sort][sort imports alphabetically in init][improvement][version-bump][bump version to 0.3.0][feat][tokenizer-class][add MythosTokenizer to init exports][feat][test-tokenizer][add tokenizer test suite with printed output]
2026-05-02 17:43:27 +02:00 · 2026-04-19 23:18:05 -04:00 · 2026-04-19 23:18:05 -04:00 · 12f6c5b32e
commit 12f6c5b32e
parent 5ffb897dcf
6 changed files with 1249 additions and 20 deletions
--- a/README.md
+++ b/README.md
@ -1,5 +1,32 @@
 # OpenMythos

+<p align="center">
+  <a href="https://pypi.org/project/open-mythos/" target="_blank">
+    <picture>
+      <source srcset="https://img.shields.io/pypi/v/open-mythos?style=for-the-badge&color=3670A0" media="(prefers-color-scheme: dark)">
+      <img alt="Version" src="https://img.shields.io/pypi/v/open-mythos?style=for-the-badge&color=3670A0">
+    </picture>
+  </a>
+  <a href="https://pypi.org/project/open-mythos/" target="_blank">
+    <picture>
+      <source srcset="https://img.shields.io/pypi/dm/open-mythos?style=for-the-badge&color=3670A0" media="(prefers-color-scheme: dark)">
+      <img alt="Downloads" src="https://img.shields.io/pypi/dm/open-mythos?style=for-the-badge&color=3670A0">
+    </picture>
+  </a>
+  <a href="https://twitter.com/kyegomezb/">
+    <picture>
+      <source srcset="https://img.shields.io/badge/Twitter-Follow-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white" media="(prefers-color-scheme: dark)">
+      <img src="https://img.shields.io/badge/Twitter-Follow-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white" alt="Twitter">
+    </picture>
+  </a>
+  <a href="https://github.com/kyegomez/OpenMythos" target="_blank">
+    <picture>
+      <source srcset="https://img.shields.io/badge/GitHub-Repo-171515?style=for-the-badge&logo=github&logoColor=white" media="(prefers-color-scheme: dark)">
+      <img src="https://img.shields.io/badge/GitHub-Repo-171515?style=for-the-badge&logo=github&logoColor=white" alt="GitHub">
+    </picture>
+  </a>
+</p>
+
 > **Disclaimer:** OpenMythos is an independent, community-driven theoretical reconstruction based solely on publicly available research and speculation. It is not affiliated with, endorsed by, or connected to Anthropic or any of their proprietary systems.

 OpenMythos is an open-source, theoretical implementation of the Claude Mythos model. It implements a Recurrent-Depth Transformer (RDT) with three stages: **Prelude** (transformer blocks), a looped **Recurrent Block** (up to `max_loop_iters`), and a final **Coda**. Attention is switchable between MLA and GQA, and the feed-forward uses a sparse MoE with routed and shared experts ideal for exploring compute-adaptive, depth-variable reasoning.
--- a/open_mythos/init.py
+++ b/open_mythos/init.py
@ -1,29 +1,29 @@
 from open_mythos.main import (
-    MythosConfig,
-    RMSNorm,
-    GQAttention,
-    MLAttention,
-    Expert,
-    MoEFFN,
-    LoRAAdapter,
-    TransformerBlock,
-    LTIInjection,
    ACTHalting,
-    RecurrentBlock,
+    Expert,
+    GQAttention,
+    LoRAAdapter,
+    LTIInjection,
+    MLAttention,
+    MoEFFN,
+    MythosConfig,
    OpenMythos,
-    precompute_rope_freqs,
+    RecurrentBlock,
+    RMSNorm,
+    TransformerBlock,
    apply_rope,
    loop_index_embedding,
+    precompute_rope_freqs,
 )
 from open_mythos.tokenizer import MythosTokenizer
 from open_mythos.variants import (
    mythos_1b,
+    mythos_1t,
    mythos_3b,
    mythos_10b,
    mythos_50b,
    mythos_100b,
    mythos_500b,
-    mythos_1t,
 )

 __all__ = [
--- a/open_mythos/moda.py
+++ b/open_mythos/moda.py
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"

 [tool.poetry]
 name = "open-mythos"
-version = "0.2.0"
+version = "0.3.0"
 description = "OpenMythos — open-source theoretical reconstruction of the Claude Mythos Recurrent-Depth Transformer architecture"
 license = "MIT"
 authors = ["Kye Gomez <kye@swarms.world>"]
--- a/test_main.py
+++ b/test_main.py
@ -1,10 +1,3 @@
-"""
-pytest suite for OpenMythos (second.py).
-Tests every major class and feature: shapes, correctness invariants, and
-architecture-specific properties (LTI stability, ACT halting, depth extrapolation,
-KV cache consistency, GQA vs MLA swap).
-"""
-
 import torch
 import pytest
 from open_mythos.main import (
--- a/tests/test_tokenizer.py
+++ b/tests/test_tokenizer.py
@ -0,0 +1,75 @@
+import pytest
+from open_mythos.tokenizer import MythosTokenizer
+
+
+@pytest.fixture(scope="module")
+def tokenizer():
+    tok = MythosTokenizer()
+    print(f"\nLoaded tokenizer: {tok.tokenizer.name_or_path}")
+    return tok
+
+
+def test_loads(tokenizer):
+    assert tokenizer is not None
+    print(f"Tokenizer: {tokenizer}")
+
+
+def test_vocab_size(tokenizer):
+    size = tokenizer.vocab_size
+    print(f"Vocab size: {size:,}")
+    assert size > 0
+
+
+def test_encode_returns_list_of_ints(tokenizer):
+    ids = tokenizer.encode("Hello, world!")
+    print(f"encode('Hello, world!') → {ids}")
+    assert isinstance(ids, list)
+    assert all(isinstance(i, int) for i in ids)
+    assert len(ids) > 0
+
+
+def test_encode_empty_string(tokenizer):
+    ids = tokenizer.encode("")
+    print(f"encode('') → {ids}")
+    assert isinstance(ids, list)
+
+
+def test_decode_returns_string(tokenizer):
+    ids = tokenizer.encode("Hello, world!")
+    text = tokenizer.decode(ids)
+    print(f"decode({ids}) → '{text}'")
+    assert isinstance(text, str)
+
+
+def test_roundtrip(tokenizer):
+    original = "The quick brown fox jumps over the lazy dog."
+    ids = tokenizer.encode(original)
+    recovered = tokenizer.decode(ids)
+    print(f"original:  '{original}'")
+    print(f"token ids: {ids}")
+    print(f"recovered: '{recovered}'")
+    assert original in recovered or recovered in original
+
+
+def test_encode_long_text(tokenizer):
+    text = "OpenMythos is a recurrent depth transformer. " * 100
+    ids = tokenizer.encode(text)
+    print(f"Long text ({len(text)} chars) → {len(ids)} tokens")
+    assert len(ids) > 100
+
+
+def test_custom_model_id():
+    tok = MythosTokenizer(model_id="openai/gpt-oss-20b")
+    print(f"Custom model_id vocab size: {tok.vocab_size:,}")
+    assert tok.vocab_size > 0
+
+
+def test_vocab_size_consistent(tokenizer):
+    outer = tokenizer.vocab_size
+    inner = tokenizer.tokenizer.vocab_size
+    print(f"vocab_size property: {outer:,}  |  inner tokenizer.vocab_size: {inner:,}")
+    assert outer == inner
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "--verbose", "-s"])