[{"@type":"PropertyValue","name":"Data content","value":"corpus for polyphone disambiguation."},{"@type":"PropertyValue","name":"Data size","value":"including 603 Mandarin character-pinyin pairs and 319,977 sentences"},{"@type":"PropertyValue","name":"Data source","value":"including news and colloquial sentences"},{"@type":"PropertyValue","name":"Annotation","value":"annotating the Mandarin pinyin pronunciation of specific polyphone contained in the sentence"},{"@type":"PropertyValue","name":"Language","value":"Chinese"},{"@type":"PropertyValue","name":"Application scenarios","value":"speech synthesis"},{"@type":"PropertyValue","name":"Accuracy","value":"at a Character Accuracy Rate of 99%"}]
{"id":1036,"datatype":"1","titleimg":"https://res.datatang.com/asset/productNew/APY190921001.png?Expires=2007353670&OSSAccessKeyId=LTAI5tQwXnJZbubgVfVa1ep9&Signature=DM5syko13evu945QZYQABOd4NEo%3D","type1":"165","type1str":null,"type2":"219","type2str":null,"dataname":"319,977 Sentences - Mandarin Polyphone Dataset for Pinyin Disambiguation","datazy":[{"title":"Data content","desc":"Data content","content":"corpus for polyphone disambiguation."},{"title":"Data size","desc":"Data size","content":"including 603 Mandarin character-pinyin pairs and 319,977 sentences"},{"title":"Data source","desc":"Data source","content":"including news and colloquial sentences"},{"title":"Annotation","desc":"Annotation","content":"annotating the Mandarin pinyin pronunciation of specific polyphone contained in the sentence"},{"title":"Language","desc":"Language","content":"Chinese"},{"title":"Application scenarios","desc":"Application scenarios","content":"speech synthesis"},{"title":"Accuracy","desc":"Accuracy","content":"at a Character Accuracy Rate of 99%"}],"datatag":"Mandarin,Polyphone,TTS,Front-end Training Data Set","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":"","samplePresentation":[{"name":"/data/apps/damp/temp/ziptemp/APY190921001_demo1712743222701/APY190921001_demo/5C6AE796-AF30-46AC-ABCC-B698830A2626.png","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY190921001_demo1712743222701/APY190921001_demo/5C6AE796-AF30-46AC-ABCC-B698830A2626.png?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=3DU9vnyc7%2BB9S6WWDgNefaHBCqc%3D","intro":"","size":0,"progress":100,"type":"jpg"}],"officialSummary":"This dataset contains 319,977 Mandarin Chinese sentences, it is designed for polyphone disambiguation. It includes 603 common Mandarin pinyin pronunciations, There are differences in the number of phonetic corpora according to the number of phrases in a single word. It is ideal for Natural Language Processing (NLP) tasks, Text-to-Speech (TTS) systems, and linguistic research.","dataexampl":null,"datakeyword":["Mandarin polyphone corpus","Pinyin disambiguation dataset","Chinese polyphone dataset","Polyphonic character corpus","Pinyin pronunciation dataset"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Language,Voice Type","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"speechSyn","BGimg":"brightSpot_audio","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
https://www.nexdata.ai/shujutang/static/image/index/datatang_yuyin_default.webp
[{"@type":"ImageObject","embedUrl":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY190921001_demo1712743222701/APY190921001_demo/5C6AE796-AF30-46AC-ABCC-B698830A2626.png?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=3DU9vnyc7%2BB9S6WWDgNefaHBCqc%3D"}]
319,977 Sentences - Mandarin Polyphone Dataset for Pinyin Disambiguation
Mandarin polyphone corpus
Pinyin disambiguation dataset
Chinese polyphone dataset
Polyphonic character corpus
Pinyin pronunciation dataset
This dataset contains 319,977 Mandarin Chinese sentences, it is designed for polyphone disambiguation. It includes 603 common Mandarin pinyin pronunciations, There are differences in the number of phonetic corpora according to the number of phrases in a single word. It is ideal for Natural Language Processing (NLP) tasks, Text-to-Speech (TTS) systems, and linguistic research.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
![Specifications]()
Specifications
Data content
corpus for polyphone disambiguation.
Data size
including 603 Mandarin character-pinyin pairs and 319,977 sentences
Data source
including news and colloquial sentences
Annotation
annotating the Mandarin pinyin pronunciation of specific polyphone contained in the sentence
Application scenarios
speech synthesis
Accuracy
at a Character Accuracy Rate of 99%
![Sample]()
Sample
![Recommended Datasets]()
Recommended Dataset
Tell Us Your Special Needs
39ba7069-8626-4a3f-aeed-7dc3c883b94b