[{"@type":"PropertyValue","name":"Data content","value":"200,475 sentences of text were transcribed in Chinese characters;"},{"@type":"PropertyValue","name":"Data scale","value":"200,475 original texts with 457,832 annotations;"},{"@type":"PropertyValue","name":"Content source","value":"Sentences extracted from various types of news, articles, novels, etc."},{"@type":"PropertyValue","name":"Language","value":"Chinese;"},{"@type":"PropertyValue","name":"Annotation","value":"Annotate the special symbols and Arabic numerals in the sentences as Chinese characters;"},{"@type":"PropertyValue","name":"Applications","value":"TTS, Text normalization;"}]
{"id":1102,"datatype":"1","titleimg":"https://res.datatang.com/asset/productNew/APY210430001.png?Expires=2007353690&OSSAccessKeyId=LTAI5tQwXnJZbubgVfVa1ep9&Signature=iYLubaJsdu%2BkGUK9Vx9rTfWGG6g%3D","type1":"165","type1str":null,"type2":"219","type2str":null,"dataname":"200,475 Sentences - Chinese Text Normalization Dataset for TTS & NLP","datazy":[{"title":"Data content","content":"200,475 sentences of text were transcribed in Chinese characters;","desc":"Data content"},{"title":"Data scale","content":"200,475 original texts with 457,832 annotations;","desc":"Data scale"},{"title":"Content source","content":"Sentences extracted from various types of news, articles, novels, etc.","desc":"Content source"},{"title":"Language","content":"Chinese;","desc":"Language"},{"title":"Annotation","content":"Annotate the special symbols and Arabic numerals in the sentences as Chinese characters;","desc":"Annotation"},{"title":"Applications","content":"TTS, Text normalization;","desc":"Applications"}],"datatag":"TN,TTS,Text Normalization","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":"","samplePresentation":[{"name":"/data/apps/damp/temp/ziptemp/APY210430001_demo1711360879318/APY210430001_demo/20210927171813646_demo.jpg","url":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY210430001_demo1711360879318/APY210430001_demo/20210927171813646_demo.jpg?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=95%2FPPMl0M9RjZnQHrGFRikiiJ5k%3D","intro":"","size":0,"progress":100,"type":"jpg"}],"officialSummary":"This dataset comprises 200,475 Mandarin Chinese sentences annotated for text normalization, transforming special symbols and Arabic numerals into Chinese characters. It is ideal for training and evaluating Text-to-Speech (TTS) systems and Natural Language Processing (NLP) models.","dataexampl":null,"datakeyword":["Chinese text normalization dataset","Mandarin TTS corpus","Text normalization for speech synthesis","Symbol-to-character annotation dataset","Mandarin text preprocessing data"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Language,Voice Type","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"speechSyn","BGimg":"brightSpot_audio","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"]}
https://www.nexdata.ai/shujutang/static/image/index/datatang_yuyin_default.webp
[{"@type":"ImageObject","embedUrl":"https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY210430001_demo1711360879318/APY210430001_demo/20210927171813646_demo.jpg?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=95%2FPPMl0M9RjZnQHrGFRikiiJ5k%3D"}]
200,475 Sentences - Chinese Text Normalization Dataset for TTS & NLP
Chinese text normalization dataset
Mandarin TTS corpus
Text normalization for speech synthesis
Symbol-to-character annotation dataset
Mandarin text preprocessing data
This dataset comprises 200,475 Mandarin Chinese sentences annotated for text normalization, transforming special symbols and Arabic numerals into Chinese characters. It is ideal for training and evaluating Text-to-Speech (TTS) systems and Natural Language Processing (NLP) models.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
![Specifications]()
Specifications
Data content
200,475 sentences of text were transcribed in Chinese characters;
Data scale
200,475 original texts with 457,832 annotations;
Content source
Sentences extracted from various types of news, articles, novels, etc.
Annotation
Annotate the special symbols and Arabic numerals in the sentences as Chinese characters;
Applications
TTS, Text normalization;
![Sample]()
Sample
![Recommended Datasets]()
Recommended Dataset
Tell Us Your Special Needs
2aacdddb-2085-4d4b-81b1-eb45d3a65ae1