[{"@type":"PropertyValue","name":"Data size","value":"3,506 OCR images, including 2,056 images of natural scenes, 1,103 Internet images, 347 document images"},{"@type":"PropertyValue","name":"Collecting environment","value":"including natural scenes (plaque, packaging instructions, small advertisements, menus, posters, etc.), Internet images (magazine covers, comic covers, etc.), document images (text documents, etc.)"},{"@type":"PropertyValue","name":"Data diversity","value":"including multiple scenes, multiple angles, different light conditions"},{"@type":"PropertyValue","name":"Device","value":"cellphone"},{"@type":"PropertyValue","name":"Shooting angles","value":"looking up angle, eye-level angle"},{"@type":"PropertyValue","name":"Format","value":"the image data format is .jpg, the annotated file format is .json"},{"@type":"PropertyValue","name":"Annotation content","value":"line-level quadrilateral bounding box annotation and transcription for the texts; column-level quadrilateral bounding box annotation and transcription for the texts"},{"@type":"PropertyValue","name":"Accuracy","value":"The error bound of each vertex of quadrilateral bounding box is within 10 pixels, which is a qualified annotation, the accuracy of bounding boxes is not less than 97%; The texts transcription accuracy is not less than 97%."}]
{"id":1058,"datatype":"1","titleimg":"https://res.datatang.com/asset/productNew/APY200102001.png?Expires=2007353677&OSSAccessKeyId=LTAI5tQwXnJZbubgVfVa1ep9&Signature=H%2BObkF8Oa2YamZD1YGgr6LR/a3g%3D","type1":"147","type1str":null,"type2":"147","type2str":null,"dataname":"3,506 Hindi OCR Images Data - Images with Annotation and Transcription","datazy":[{"title":"Data size","value":"3,506 OCR images, including 2,056 images of natural scenes, 1,103 Internet images, 347 document images"},{"title":"Collecting environment","value":"including natural scenes (plaque, packaging instructions, small advertisements, menus, posters, etc.), Internet images (magazine covers, comic covers, etc.), document images (text documents, etc.)"},{"title":"Data diversity","value":"including multiple scenes, multiple angles, different light conditions"},{"title":"Device","value":"cellphone"},{"title":"Shooting angles","value":"looking up angle, eye-level angle"},{"title":"Format","value":"the image data format is .jpg, the annotated file format is .json"},{"title":"Annotation content","value":"line-level quadrilateral bounding box annotation and transcription for the texts; column-level quadrilateral bounding box annotation and transcription for the texts"},{"title":"Accuracy","value":"The error bound of each vertex of quadrilateral bounding box is within 10 pixels, which is a qualified annotation, the accuracy of bounding boxes is not less than 97%; The texts transcription accuracy is not less than 97%."}],"datatag":"Hindi OCR,Multiple scenes,Multiple angles,Different light conditions","technologydoc":null,"downurl":null,"datainfo":"","standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":"","samplePresentation":[["jpg","https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY200102001_demo1695808983232/APY200102001_demo/00004.jpg?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=1RHyBQ877xadK9qW66Lj6Ivikq0%3D","/data/apps/damp/temp/ziptemp/APY200102001_demo1695808983232/APY200102001_demo/00004.jpg",""],["jpg","https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY200102001_demo1695808983232/APY200102001_demo/00006.jpg?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=ZYCVjKlMLMjdYWQ17c9BpniCAa0%3D","/data/apps/damp/temp/ziptemp/APY200102001_demo1695808983232/APY200102001_demo/00006.jpg",""],["jpg","https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY200102001_demo1695808983232/APY200102001_demo/00027.jpg?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=p8LQVuz%2BoHuw%2BP8mwBwq45M3q9Q%3D","/data/apps/damp/temp/ziptemp/APY200102001_demo1695808983232/APY200102001_demo/00027.jpg",""]],"officialSummary":"3,506 Hindi OCR Images Data - Images with Annotation and Transcription. The data includes 2,056 images of natural scenes, 1,103 Internet images and 347 document images. For line-level content annotation, line-level quadrilateral bounding box annotation and test transcription was adpoted; for column-level content annotation, column-level quadrilateral bounding box annotation and text transcription was adpoted. The data can be used for tasks such as Hindi character recognition in multiple scenes.","dataexampl":"","datakeyword":["Hindi"," OCR"," document images"," Internet images"," natural scenes"," multiple angles"," different light conditions"," quadrilateral bounding box annotation"," line-level transcription for the texts"," column-level transcription for the texts"],"isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":"Data Type,Language","tagTypeZh":null,"website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"ocr","BGimg":"","voiceBg":["/shujutang/static/image/comm/audio_bg.webp","/shujutang/static/image/comm/audio_bg2.webp","/shujutang/static/image/comm/audio_bg3.webp","/shujutang/static/image/comm/audio_bg4.webp","/shujutang/static/image/comm/audio_bg5.webp"],"single":"no","firstList":[["jpg","https://bj-oss-datatang-03.oss-cn-beijing.aliyuncs.com/filesInfoUpload/data/apps/damp/temp/ziptemp/APY200102001_demo1695808983232/APY200102001_demo/00001.jpg?Expires=4102329599&OSSAccessKeyId=LTAI8NWs2pDolLNH&Signature=VNNczsPK38IDN%2F2BxuBFYMdpzBc%3D","/data/apps/damp/temp/ziptemp/APY200102001_demo1695808983232/APY200102001_demo/00001.jpg",""]]}
3,506 Hindi OCR Images Data - Images with Annotation and Transcription
Hindi
OCR
document images
Internet images
natural scenes
multiple angles
different light conditions
quadrilateral bounding box annotation
line-level transcription for the texts
column-level transcription for the texts
3,506 Hindi OCR Images Data - Images with Annotation and Transcription. The data includes 2,056 images of natural scenes, 1,103 Internet images and 347 document images. For line-level content annotation, line-level quadrilateral bounding box annotation and test transcription was adpoted; for column-level content annotation, column-level quadrilateral bounding box annotation and text transcription was adpoted. The data can be used for tasks such as Hindi character recognition in multiple scenes.
This is a paid datasets for commercial use, research purpose and more. Licensed ready made datasets help jump-start AI projects.
Specifications
Data size
3,506 OCR images, including 2,056 images of natural scenes, 1,103 Internet images, 347 document images
Collecting environment
including natural scenes (plaque, packaging instructions, small advertisements, menus, posters, etc.), Internet images (magazine covers, comic covers, etc.), document images (text documents, etc.)
Data diversity
including multiple scenes, multiple angles, different light conditions
Device
cellphone
Shooting angles
looking up angle, eye-level angle
Format
the image data format is .jpg, the annotated file format is .json
Annotation content
line-level quadrilateral bounding box annotation and transcription for the texts; column-level quadrilateral bounding box annotation and transcription for the texts
Accuracy
The error bound of each vertex of quadrilateral bounding box is within 10 pixels, which is a qualified annotation, the accuracy of bounding boxes is not less than 97%; The texts transcription accuracy is not less than 97%.