HTTP 200 OK
Allow: GET, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept
{
"id": 14,
"title": "IndicConformer",
"area": "ASR",
"published_on": "2024-09-07",
"conference": null,
"description": "AI4Bharat's IndicConformers is a suite of ASR models built to deliver accurate speech-to-text conversion in all 22 official Indian languages. By leveraging cutting-edge deep learning techniques, these models provide precise transcriptions. As the country's first open-source ASR system covering such a vast array of languages, AI4Bharat Indic Conformer is a transformative tool for making technology more inclusive and accessible to all. IndicConformer is released under the MIT license.",
"paper_link": null,
"colab_link": "https://colab.research.google.com/drive/1ZQJEhYgLKS72_V4LvNmsyU2zF9pICRvE",
"website_link": "https://ai4bharat.github.io/ai4b-website/areas/model/ASR/IndicConformer",
"github_link": "https://github.com/AI4Bharat/IndicConformerASR",
"service_id": "ai4bharat/conformer-multilingual-all--gpu-t4",
"hf_link": "https://huggingface.co/collections/ai4bharat/indicconformer-66d9e933a243cba4b679cb7f",
"installation_steps_json": [
{
"instruction": "Setting up conda",
"codeString": null,
"type": "heading"
},
{
"instruction": "Creating and activating conda environment",
"codeString": "conda create -n temo python=3.10\nconda activate temo",
"type": "instruction"
},
{
"instruction": "Installing libraries",
"codeString": "pip3 install torch torchvision torchaudio\npip install packaging\npip install huggingface_hub==0.23.2",
"type": "instruction"
},
{
"instruction": "Cloning repository",
"codeString": "git clone https://github.com/AI4Bharat/NeMo.git\ncd NeMo\nbash reinstall.sh",
"type": "instruction"
}
],
"usage_steps_json": [
{
"instruction": "Inference",
"codeString": null,
"type": "heading"
},
{
"instruction": "Download the model checkpoints from the GitHub repository.",
"codeString": "https://github.com/AI4Bharat/IndicConformerASR",
"type": "instruction"
},
{
"instruction": "Loading the checkpoint",
"codeString": "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\nmodel = nemo_asr.models.EncDecCTCModel.restore_from(restore_path='<CHECKPOINT_PATH>.nemo')\nmodel.freeze()\nmodel = model.to(device)",
"type": "instruction"
},
{
"instruction": "CTC Decoding",
"codeString": "model.cur_decoder = 'ctc'\nctc_text = model.transcribe(['/path/audio_path.wav'], batch_size=1,logprobs=False, language_id='LANG_ID')[0]",
"type": "instruction"
},
{
"instruction": "RNN-T Decoding",
"codeString": "model.cur_decoder = 'rnnt'\nctc_text = model.transcribe(['/path/audio_path.wav'], batch_size=1, language_id='LANG_ID')[0]",
"type": "instruction"
}
],
"testimonials_json": null,
"latest": true,
"paper_award": null,
"license": [],
"type": "Model",
"hfData": {
"downloads": 144863
},
"services": {
"ai4bharat/conformer-multilingual-all--gpu-t4": {
"service_id": "ai4bharat/conformer-multilingual-all--gpu-t4",
"languageFilters": {
"sourceLanguages": [
"ks",
"ne",
"kok",
"mni",
"sd",
"bn",
"sat",
"ml",
"mr",
"kn",
"ta",
"sa",
"as",
"or",
"hi",
"brx",
"te",
"gu",
"ur",
"pa",
"doi",
"mai"
],
"targetLanguages": []
}
}
}
}