NVIDIA · mgrafu · May 27, 2026 · May 27, 2026
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -12,9 +12,10 @@ pipeline {
   environment {
     AR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-24-24-0'
     DE_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-23-24-0'
-    EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-25-0'
+    EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/05-27-26-0'
     ES_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-24-0'
     ES_EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/08-30-24-0'
+    HI_EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/05-27-26-0'
     FR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-07-25-0'
     HU_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/07-16-24-0'
     PT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/05-01-26-1'
@@ -27,8 +28,8 @@ pipeline {
     HY_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1'
-    KO_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-23-26-0'
-    HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-23-26-0'
+    HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/05-27-26-0'
+    KO_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-03-25-0'
     DEFAULT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {
@@ -144,21 +145,12 @@ pipeline {
             sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=es_en --text="ciento uno " --cache_dir ${ES_EN_TN_CACHE}'
           }
         }
-      }
-    }
-
-    stage('L0: Create AR TN/ITN Grammars') {
-      when {
-        anyOf {
-          branch 'main' 
-          branch 'staging/**'
-          branch 'staging_*'
-          changeRequest target: 'main'
+        stage('L0: Codeswitched HI/EN ITN grammars') {
+          steps {
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hi_en --text="एक" --cache_dir ${HI_EN_TN_CACHE}'
+          }
         }
-      }
-      failFast true
-      parallel {
-        stage('L0: AR TN grammars') {
+        stage('L0: FR TN grammars') {
           steps {
             sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=ar --text="2" --cache_dir ${AR_TN_CACHE}'
           }
@@ -409,6 +401,11 @@ pipeline {
             sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/es_en/ -m "not pleasefixme" --cpu --tn_cache_dir ${ES_EN_TN_CACHE}'
           }
         }
+        stage('L1: Run all Codeswitched HI/EN TN/ITN tests (restore grammars from cache)') {
+          steps {
+            sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hi_en/ -m "not pleasefixme" --cpu --tn_cache_dir ${HI_EN_TN_CACHE}'
+          }
+        }
         stage('L1: Run all AR TN/ITN tests (restore grammars from cache)') {
           steps {
             sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/ar/ -m "not pleasefixme" --cpu --tn_cache_dir ${AR_TN_CACHE}'

diff --git a/nemo_text_processing/inverse_text_normalization/hi_en/__init__.py b/nemo_text_processing/inverse_text_normalization/hi_en/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_text_processing.inverse_text_normalization.hi_en.taggers.tokenize_and_classify import ClassifyFst
+from nemo_text_processing.inverse_text_normalization.hi_en.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.inverse_text_normalization.hi_en.verbalizers.verbalize_final import VerbalizeFinalFst
diff --git a/nemo_text_processing/inverse_text_normalization/hi_en/graph_utils.py b/nemo_text_processing/inverse_text_normalization/hi_en/graph_utils.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/inverse_text_normalization/hi_en/taggers/__init__.py b/nemo_text_processing/inverse_text_normalization/hi_en/taggers/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/inverse_text_normalization/hi_en/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/hi_en/taggers/tokenize_and_classify.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.en.taggers.cardinal import CardinalFst as EnCardinalFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.date import DateFst as EnDateFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.decimal import DecimalFst as EnDecimalFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.electronic import ElectronicFst as EnElectronicFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.measure import MeasureFst as EnMeasureFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.money import MoneyFst as EnMoneyFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.ordinal import OrdinalFst as EnOrdinalFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.punctuation import PunctuationFst as EnPunctuationFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.telephone import TelephoneFst as EnTelephoneFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.time import TimeFst as EnTimeFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.whitelist import WhiteListFst as EnWhiteListFst
+from nemo_text_processing.inverse_text_normalization.en.taggers.word import WordFst as EnWordFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.date import DateFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.decimal import DecimalFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.fraction import FractionFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.measure import MeasureFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.money import MoneyFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.ordinal import OrdinalFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.punctuation import PunctuationFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.telephone import TelephoneFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.time import TimeFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.whitelist import WhiteListFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.word import WordFst
+from nemo_text_processing.text_normalization.en.graph_utils import (
+    INPUT_LOWER_CASED,
+    GraphFst,
+    delete_extra_space,
+    delete_space,
+    generator_main,
+)
+from nemo_text_processing.utils.logging import logger
+
+
+class ClassifyFst(GraphFst):
+    """
+    Final class that composes all other classification grammars. This class can process an entire sentence, that is lower cased.
+    For deployment, this grammar will be compiled and exported to OpenFst Finite State Archive (FAR) File.
+    More details to deployment at NeMo/tools/text_processing_deployment.
+
+    Args:
+        cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache.
+        overwrite_cache: set to True to overwrite .far files
+        whitelist: path to a file with Hindi whitelist replacements. If None, defaults to the Hindi whitelist at
+            nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist.tsv
+        en_whitelist: path to a file with English whitelist replacements. If None, defaults to the English whitelist at
+            nemo_text_processing/inverse_text_normalization/en/data/whitelist.tsv
+        input_case: accepting either "lower_cased" or "cased" input.
+    """
+
+    def __init__(
+        self,
+        cache_dir: str = None,
+        overwrite_cache: bool = False,
+        whitelist: str = None,
+        en_whitelist: str = None,
+        input_case: str = INPUT_LOWER_CASED,
+    ):
+        super().__init__(name="tokenize_and_classify", kind="classify")
+
+        far_file = None
+        if cache_dir is not None and cache_dir != "None":
+            os.makedirs(cache_dir, exist_ok=True)
+            far_file = os.path.join(cache_dir, f"hi_en_itn_{input_case}.far")
+        if not overwrite_cache and far_file and os.path.exists(far_file):
+            self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
+            logger.info(f"ClassifyFst.fst was restored from {far_file}.")
+        else:
+            logger.info(f"Creating ClassifyFst grammars.")
+
+            cardinal = CardinalFst()
+            cardinal_graph = cardinal.fst
+
+            ordinal = OrdinalFst(cardinal)
+            ordinal_graph = ordinal.fst
+
+            decimal = DecimalFst(cardinal)
+            decimal_graph = decimal.fst
+
+            fraction = FractionFst(cardinal)
+            fraction_graph = fraction.fst
+
+            measure_graph = MeasureFst(cardinal=cardinal, decimal=decimal).fst
+            date_graph = DateFst(cardinal, ordinal).fst
+            word_graph = WordFst().fst
+            time_graph = TimeFst(cardinal).fst
+            money_graph = MoneyFst(cardinal=cardinal, decimal=decimal).fst
+            whitelist_graph = WhiteListFst(input_file=whitelist).fst
+            punct_graph = PunctuationFst().fst
+            telephone_graph = TelephoneFst(cardinal).fst
+
+            en_cardinal = EnCardinalFst(input_case=input_case)
+            en_cardinal_graph = en_cardinal.fst
+
+            en_ordinal = EnOrdinalFst(cardinal=en_cardinal, input_case=input_case)
+            en_ordinal_graph = en_ordinal.fst
+
+            en_decimal = EnDecimalFst(cardinal=en_cardinal, input_case=input_case)
+            en_decimal_graph = en_decimal.fst
+
+            en_measure_graph = EnMeasureFst(cardinal=en_cardinal, decimal=en_decimal, input_case=input_case).fst
+            en_date_graph = EnDateFst(ordinal=en_ordinal, input_case=input_case).fst
+            en_word_graph = EnWordFst().fst
+            en_time_graph = EnTimeFst(input_case=input_case).fst
+            en_money_graph = EnMoneyFst(cardinal=en_cardinal, decimal=en_decimal, input_case=input_case).fst
+            en_whitelist_graph = EnWhiteListFst(input_file=en_whitelist, input_case=input_case).fst
+            en_punct_graph = EnPunctuationFst().fst
+            en_electronic_graph = EnElectronicFst(input_case=input_case).fst
+            en_telephone_graph = EnTelephoneFst(cardinal=en_cardinal, input_case=input_case).fst
+
+            classify = (
+                pynutil.add_weight(whitelist_graph, 1.01)
+                | pynutil.add_weight(en_whitelist_graph, 1.01)
+                | pynutil.add_weight(time_graph, 1.1)
+                | pynutil.add_weight(en_time_graph, 1.1)
+                | pynutil.add_weight(date_graph, 1.09)
+                | pynutil.add_weight(en_date_graph, 1.09)
+                | pynutil.add_weight(decimal_graph, 1.09)
+                | pynutil.add_weight(en_decimal_graph, 1.09)
+                | pynutil.add_weight(fraction_graph, 1.09)
+                | pynutil.add_weight(measure_graph, 1.6)
+                | pynutil.add_weight(en_measure_graph, 1.1)
+                | pynutil.add_weight(cardinal_graph, 1.6)
+                | pynutil.add_weight(en_cardinal_graph, 1.1)
+                | pynutil.add_weight(ordinal_graph, 1.6)
+                | pynutil.add_weight(en_ordinal_graph, 1.09)
+                | pynutil.add_weight(money_graph, 1.6)
+                | pynutil.add_weight(en_money_graph, 1.1)
+                | pynutil.add_weight(telephone_graph, 1.6)
+                | pynutil.add_weight(en_telephone_graph, 1.1)
+                | pynutil.add_weight(en_electronic_graph, 1.1)
+                | pynutil.add_weight(word_graph, 100)
+                | pynutil.add_weight(en_word_graph, 120)
+            )
+
+            punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=1.1) + pynutil.insert(" }")
+            en_punct = (
+                pynutil.insert("tokens { ") + pynutil.add_weight(en_punct_graph, weight=1.3) + pynutil.insert(" }")
+            )
+            token = pynutil.insert("tokens { ") + classify + pynutil.insert(" }")
+            token_plus_punct = (
+                pynini.closure(punct + pynutil.insert(" "))
+                + token
+                + pynini.closure(pynutil.insert(" ") + punct | en_punct)
+            )
+
+            graph = token_plus_punct + pynini.closure(delete_extra_space + token_plus_punct)
+            graph = delete_space + graph + delete_space
+
+            self.fst = graph.optimize()
+
+            if far_file:
+                generator_main(far_file, {"tokenize_and_classify": self.fst})
+                logger.info(f"ClassifyFst grammars are saved to {far_file}.")
diff --git a/nemo_text_processing/inverse_text_normalization/hi_en/utils.py b/nemo_text_processing/inverse_text_normalization/hi_en/utils.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+
+def get_abs_path(rel_path):
+    """
+    Get absolute path
+
+    Args:
+        rel_path: relative path to this file
+
+    Returns absolute path
+    """
+    return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
diff --git a/nemo_text_processing/inverse_text_normalization/hi_en/verbalizers/__init__.py b/nemo_text_processing/inverse_text_normalization/hi_en/verbalizers/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.