{"id":14,"title":"IndicConformer","area":"ASR","published_on":"2024-09-07","conference":null,"description":"AI4Bharat's IndicConformers is a suite of ASR models built to deliver accurate speech-to-text conversion in all 22 official Indian languages. By leveraging cutting-edge deep learning techniques, these models provide precise transcriptions. As the country's first open-source ASR system covering such a vast array of languages, AI4Bharat Indic Conformer is a transformative tool for making technology more inclusive and accessible to all. IndicConformer is released under the MIT license.","paper_link":null,"colab_link":"https://colab.research.google.com/drive/1ZQJEhYgLKS72_V4LvNmsyU2zF9pICRvE","website_link":"https://ai4bharat.github.io/ai4b-website/areas/model/ASR/IndicConformer","github_link":"https://github.com/AI4Bharat/IndicConformerASR","service_id":"ai4bharat/conformer-multilingual-all--gpu-t4","hf_link":"https://huggingface.co/collections/ai4bharat/indicconformer-66d9e933a243cba4b679cb7f","installation_steps_json":[{"instruction":"Setting up conda","codeString":null,"type":"heading"},{"instruction":"Creating and activating conda environment","codeString":"conda create -n temo python=3.10\nconda activate temo","type":"instruction"},{"instruction":"Installing libraries","codeString":"pip3 install torch torchvision torchaudio\npip install packaging\npip install huggingface_hub==0.23.2","type":"instruction"},{"instruction":"Cloning repository","codeString":"git clone https://github.com/AI4Bharat/NeMo.git\ncd NeMo\nbash reinstall.sh","type":"instruction"}],"usage_steps_json":[{"instruction":"Inference","codeString":null,"type":"heading"},{"instruction":"Download the model checkpoints from the GitHub repository.","codeString":"https://github.com/AI4Bharat/IndicConformerASR","type":"instruction"},{"instruction":"Loading the checkpoint","codeString":"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\nmodel = nemo_asr.models.EncDecCTCModel.restore_from(restore_path='<CHECKPOINT_PATH>.nemo')\nmodel.freeze()\nmodel = model.to(device)","type":"instruction"},{"instruction":"CTC Decoding","codeString":"model.cur_decoder = 'ctc'\nctc_text = model.transcribe(['/path/audio_path.wav'], batch_size=1,logprobs=False, language_id='LANG_ID')[0]","type":"instruction"},{"instruction":"RNN-T Decoding","codeString":"model.cur_decoder = 'rnnt'\nctc_text = model.transcribe(['/path/audio_path.wav'], batch_size=1, language_id='LANG_ID')[0]","type":"instruction"}],"testimonials_json":null,"latest":true,"paper_award":null,"license":[],"type":"Model","hfData":{"downloads":144863},"services":{"ai4bharat/conformer-multilingual-all--gpu-t4":{"service_id":"ai4bharat/conformer-multilingual-all--gpu-t4","languageFilters":{"sourceLanguages":["ks","ne","kok","mni","sd","bn","sat","ml","mr","kn","ta","sa","as","or","hi","brx","te","gu","ur","pa","doi","mai"],"targetLanguages":[]}}}}