[{"@type":"PropertyValue","name":"Format","value":"16kHz, 16bit, uncompressed wav, mono channel;"},{"@type":"PropertyValue","name":"Recording condition","value":"Low background noise;"},{"@type":"PropertyValue","name":"Content category","value":"generic domain(without given topics);"},{"@type":"PropertyValue","name":"Recording device","value":"Android Smartphone;"},{"@type":"PropertyValue","name":"Speaker","value":"700 people, 35%male and 65% femal;"},{"@type":"PropertyValue","name":"Country","value":"China(CHN);"},{"@type":"PropertyValue","name":"Language(Region) Code","value":"zh-CN;"},{"@type":"PropertyValue","name":"Language","value":"Mandarin Chinese;"},{"@type":"PropertyValue","name":"Features of annotation","value":"Transcription text; 4 noise symbols; mainly annotates for near-end speech"},{"@type":"PropertyValue","name":"Accuracy Rate","value":"Sentence Accuracy Rate (SAR) 95%"}]
{"id":77,"datatype":"1","titleimg":"[{\"name\":\"APY161101040.png\",\"url\":\"https://res.datatang.com/asset/productNew/APY161101040.png?Expires=2007353628&OSSAccessKeyId=LTAI5tQwXnJZbubgVfVa1ep9&Signature=nRsuRhyqqn7s%2BF3hpWWqaHu00fM%3D\",\"size\":2048,\"progress\":100}]","type1":"165","type1str":null,"type2":"166","type2str":null,"dataname":"1,420 Hours - Mandarin Chinese(China) Spontaneous Monologue Smartphone speech dataset","datazy":[{"title":"Format","desc":"Format","content":"16kHz, 16bit, uncompressed wav, mono channel;"},{"title":"Recording condition","desc":"Recording condition","content":"Low background noise;"},{"title":"Content category","desc":"Content category","content":"generic domain(without given topics);"},{"title":"Recording device","desc":"Recording device","content":"Android Smartphone;"},{"title":"Speaker","desc":"Speaker","content":"700 people, 35%male and 65% femal;"},{"title":"Country","desc":"Country","content":"China(CHN);"},{"title":"Language(Region) Code","desc":"Language(Region) Code","content":"zh-CN;"},{"title":"Language","desc":"Language","content":"Mandarin Chinese;"},{"title":"Features of annotation","desc":"Features of annotation","content":"Transcription text; 4 noise symbols; mainly annotates for near-end speech"},{"title":"Accuracy Rate","desc":"Accuracy Rate","content":"Sentence Accuracy Rate (SAR) 95%"}],"datatag":"Chinese,Mandarin,Spontaneous,Smartphone,On the line,Monologue","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":null,"samplePresentation":[{"name":"/data/apps/damp/temp/ziptemp/apy1611010401695808866398/apy161101040/S0019.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/apy1611010401695808866398/apy161101040/S0019.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=na8sOxNWOL0WLhvVBHKVdmAP64o%3D","intro":"你觉得我说话语速快吗","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/apy1611010401695808866398/apy161101040/S0008.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/apy1611010401695808866398/apy161101040/S0008.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=Alfrz0BNk85P1ylyFWk0xHu%2BlKw%3D","intro":"看看到时间了然后我给你发过去","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/apy1611010401695808866398/apy161101040/S0011.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/apy1611010401695808866398/apy161101040/S0011.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=hQfAAJnmqUENqI%2Fqean4Mq7m0hY%3D","intro":"你这几天你[P]你都几点睡觉呀","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/apy1611010401695808866398/apy161101040/S0009.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/apy1611010401695808866398/apy161101040/S0009.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=pYzB27cvij1yE9X6Z86BzNfTm7w%3D","intro":"嗯可以了是吧","size":0,"progress":100,"type":"mp3"},{"name":"/data/apps/damp/temp/ziptemp/apy1611010401695808866398/apy161101040/S0026.wav","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/apy1611010401695808866398/apy161101040/S0026.wav?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=qyG5G0HbKe%2BJDwfJlgIxNP3CD78%3D","intro":"然后那个得准备好检查的东西","size":0,"progress":100,"type":"mp3"}],"officialSummary":"Mandarin Chinese(China) Spontaneous Monologue Smartphone speech dataset, collected from dialogues without given topics, close to casual conversation, covering generic domain. Transcribed with text content, noise and other attributes. Our dataset was collected from extensive and diversify speakers(700 Chinese in total), geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.","dataexampl":null,"datakeyword":["Mandarin asr data"," Mandarin asr dataset"," Mandarin asr collection"," Mandarin language data"," Mandarin language dataset"," Mandarin language collection"," Mandarin speech data"," Mandarin speech dataset"," Mandarin speech collection"," Mandarin discuss asr data"," Mandarin discuss asr dataset"," Mandarin discuss asr collection"," Mandarin discuss language data"," Mandarin discuss language dataset"," Mandarin discuss language collection"," Mandarin discuss speech data"," Mandarin discuss speech dataset"," Mandarin discuss speech collection"," Mandarin small talk asr data"," Mandarin small talk asr dataset"," Mandarin small talk asr collection"," Mandarin small talk language collection"," Mandarin small talk speech data"," Mandarin small talk speech dataset"," Mandarin conversational asr data"," Mandarin conversational asr dataset"," Mandarin conversational asr collection"," Mandarin conversational speech data"," Mandarin conversational speech dataset"," Mandarin chat asr data"," Mandarin chat asr dataset"," Mandarin chat asr collection"," Mandarin chat language dataset"," Mandarin chat language collection"," Mandarin chat speech data"," Mandarin chat speech dataset"," Mandarin chat speech collection"," Mandarin speech asr data"," Mandarin speech asr collection"," Mandarin speech language data"," Mandarin speech language dataset"," Mandarin speech language collection"," Mandarin talk asr data"," Mandarin talk asr dataset"," Mandarin conversation asr dataset"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Language,Data Type","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"speechRec","BGimg":"brightSpot_audio","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
Mandarin Chinese(China) Spontaneous Monologue Smartphone speech dataset, collected from dialogues without given topics, close to casual conversation, covering generic domain. Transcribed with text content, noise and other attributes. Our dataset was collected from extensive and diversify speakers(700 Chinese in total), geographicly speaking, enhancing model performance in real and complex tasks. Quality tested by various AI companies. We strictly adhere to data protection regulations and privacy standards, ensuring the maintenance of user privacy and legal rights throughout the data collection, storage, and usage processes, our datasets are all GDPR, CCPA, PIPL complied.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Specifications
Format
16kHz, 16bit, uncompressed wav, mono channel;
Recording condition
Low background noise;
Content category
generic domain(without given topics);
Recording device
Android Smartphone;
Speaker
700 people, 35%male and 65% femal;
Country
China(CHN);
Language(Region) Code
zh-CN;
Language
Mandarin Chinese;
Features of annotation
Transcription text; 4 noise symbols; mainly annotates for near-end speech