{"id":124619,"url":"https://github.com/datajuicer/data-juicer","last_synced_at":"2026-04-03T06:04:33.093Z","repository":{"id":185316735,"uuid":"673277958","full_name":"datajuicer/data-juicer","owner":"datajuicer","description":"Data processing for and with foundation models!  🍎 🍋 🌽 ➡️ ➡️🍸 🍹 🍷","archived":false,"fork":false,"pushed_at":"2026-03-24T03:35:35.000Z","size":1022647,"stargazers_count":6114,"open_issues_count":56,"forks_count":347,"subscribers_count":20,"default_branch":"main","last_synced_at":"2026-03-24T04:41:17.724Z","etag":null,"topics":["data","data-analysis","data-pipeline","data-processing","data-science","data-visualization","foundation-models","instruction-tuning","large-language-models","llm","llms","multi-modal","pre-training","synthetic-data"],"latest_commit_sha":null,"homepage":"https://datajuicer.github.io/data-juicer/","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/datajuicer.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null,"notice":null,"maintainers":null,"copyright":null,"agents":null,"dco":null,"cla":null}},"created_at":"2023-08-01T09:16:41.000Z","updated_at":"2026-03-24T03:16:25.000Z","dependencies_parsed_at":null,"dependency_job_id":"b3dfe45b-7817-4c78-b9b4-06c1e0fd1372","html_url":"https://github.com/datajuicer/data-juicer","commit_stats":{"total_commits":223,"total_committers":28,"mean_commits":7.964285714285714,"dds":0.7713004484304933,"last_synced_commit":"8e9b4c0b21d099fb950e74e6e176a5f730dd39eb"},"previous_names":["alibaba/data-juicer","modelscope/data-juicer","datajuicer/data-juicer"],"tags_count":27,"template":false,"template_full_name":null,"purl":"pkg:github/datajuicer/data-juicer","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/datajuicer%2Fdata-juicer","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/datajuicer%2Fdata-juicer/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/datajuicer%2Fdata-juicer/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/datajuicer%2Fdata-juicer/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/datajuicer","download_url":"https://codeload.github.com/datajuicer/data-juicer/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/datajuicer%2Fdata-juicer/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":31172821,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-03-29T21:28:10.185Z","status":"online","status_checked_at":"2026-03-30T02:00:06.831Z","response_time":138,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"owner":{"login":"datajuicer","name":"DataJuicer","uuid":"223222708","kind":"organization","description":"Data processing for and with large models.","email":"datajuicer@outlook.com","website":null,"location":null,"twitter":null,"company":null,"icon_url":"https://avatars.githubusercontent.com/u/223222708?v=4","repositories_count":1,"last_synced_at":"2025-11-05T09:13:59.618Z","metadata":{"has_sponsors_listing":false},"html_url":"https://github.com/datajuicer","funding_links":[],"total_stars":0,"followers":3,"following":0,"created_at":"2025-11-05T09:13:59.639Z","updated_at":"2025-11-05T09:13:59.639Z","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/datajuicer","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/datajuicer/repositories"},"packages":[{"id":8279458,"name":"py-data-juicer","ecosystem":"pypi","description":"Data Processing for and with Foundation Models.","homepage":null,"licenses":"Apache-2.0","normalized_licenses":["Apache-2.0"],"repository_url":"https://github.com/datajuicer/data-juicer","keywords_array":[],"namespace":null,"versions_count":26,"first_release_published_at":"2023-09-15T02:19:28.000Z","latest_release_published_at":"2026-02-26T05:08:49.000Z","latest_release_number":"1.5.0","last_synced_at":"2026-03-16T22:28:11.400Z","created_at":"2023-09-15T02:32:14.860Z","updated_at":"2026-03-16T22:28:11.400Z","registry_url":"https://pypi.org/project/py-data-juicer/","install_command":"pip install py-data-juicer --index-url https://pypi.org/simple","documentation_url":"https://py-data-juicer.readthedocs.io/","metadata":{"funding":null,"documentation":null,"classifiers":["License :: OSI Approved :: Apache Software License","Operating System :: OS Independent","Programming Language :: Python :: 3"],"normalized_name":"py-data-juicer","project_status":null},"repo_metadata":{"id":185316735,"uuid":"673277958","full_name":"modelscope/data-juicer","owner":"modelscope","description":"A one-stop data processing system to make data higher-quality, juicier, and more digestible for LLMs!  🍎 🍋 🌽 ➡️ ➡️🍸 🍹 🍷为大语言模型提供更高质量、更丰富、更易”消化“的数据！","archived":false,"fork":false,"pushed_at":"2024-03-21T02:15:39.000Z","size":34272,"stargazers_count":1321,"open_issues_count":12,"forks_count":78,"subscribers_count":13,"default_branch":"main","last_synced_at":"2024-03-21T03:27:04.766Z","etag":null,"topics":["chinese","data-analysis","data-science","data-visualization","dataset","gpt","gpt-4","instruction-tuning","large-language-models","llama","llava","llm","llms","multi-modal","nlp","opendata","pre-training","pytorch","sora","streamlit"],"latest_commit_sha":null,"homepage":"","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/modelscope.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null}},"created_at":"2023-08-01T09:16:41.000Z","updated_at":"2024-03-21T03:27:14.284Z","dependencies_parsed_at":null,"dependency_job_id":"b3dfe45b-7817-4c78-b9b4-06c1e0fd1372","html_url":"https://github.com/modelscope/data-juicer","commit_stats":{"total_commits":63,"total_committers":13,"mean_commits":4.846153846153846,"dds":0.6349206349206349,"last_synced_commit":"f2999866361fd181a21bf580c3e31a0689f74941"},"previous_names":["alibaba/data-juicer","modelscope/data-juicer"],"tags_count":4,"template":false,"template_full_name":null,"repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/modelscope","download_url":"https://codeload.github.com/modelscope/data-juicer/tar.gz/refs/heads/main","host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":214662657,"owners_count":15766250,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"},"owner_record":{"login":"modelscope","name":"ModelScope","uuid":"109945100","kind":"organization","description":"Model-as-a-Service in the making: bring accessible AI to all.","email":"contact@modelscope.cn","website":"https://www.modelscope.cn/","location":null,"twitter":null,"company":null,"icon_url":"https://avatars.githubusercontent.com/u/109945100?v=4","repositories_count":3,"last_synced_at":"2023-04-03T10:08:13.111Z","metadata":{"has_sponsors_listing":false},"html_url":"https://github.com/modelscope","funding_links":[],"total_stars":null,"followers":null,"following":null,"created_at":"2023-04-03T10:08:13.157Z","updated_at":"2023-04-03T10:08:13.157Z","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/modelscope","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/modelscope/repositories"},"tags":[{"name":"v0.2.0","sha":"156ed20acdac7c3fe02911bc9d98ecebd3ec5fb0","kind":"commit","published_at":"2024-03-07T12:23:25.000Z","download_url":"https://codeload.github.com/modelscope/data-juicer/tar.gz/v0.2.0","html_url":"https://github.com/modelscope/data-juicer/releases/tag/v0.2.0","dependencies_parsed_at":null,"dependency_job_id":null,"tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/tags/v0.2.0","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/tags/v0.2.0/manifests"},{"name":"v0.1.3","sha":"a3c8310bf0848e787fba5eca2373e69a25767fd7","kind":"commit","published_at":"2024-01-05T07:19:44.000Z","download_url":"https://codeload.github.com/modelscope/data-juicer/tar.gz/v0.1.3","html_url":"https://github.com/modelscope/data-juicer/releases/tag/v0.1.3","dependencies_parsed_at":null,"dependency_job_id":null,"tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/tags/v0.1.3","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/tags/v0.1.3/manifests"},{"name":"v0.1.2","sha":"5bd715d074d4b385a9eee9a1a670683e49362658","kind":"commit","published_at":"2023-09-28T04:05:19.000Z","download_url":"https://codeload.github.com/modelscope/data-juicer/tar.gz/v0.1.2","html_url":"https://github.com/modelscope/data-juicer/releases/tag/v0.1.2","dependencies_parsed_at":null,"dependency_job_id":null,"tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/tags/v0.1.2","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/tags/v0.1.2/manifests"},{"name":"v0.1.0","sha":"d4ab729b89d440d10e531e16302276504a074608","kind":"commit","published_at":"2023-08-10T11:19:05.000Z","download_url":"https://codeload.github.com/modelscope/data-juicer/tar.gz/v0.1.0","html_url":"https://github.com/modelscope/data-juicer/releases/tag/v0.1.0","dependencies_parsed_at":null,"dependency_job_id":null,"tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/tags/v0.1.0","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/tags/v0.1.0/manifests"}]},"repo_metadata_updated_at":"2024-08-09T12:23:35.259Z","dependent_packages_count":0,"downloads":1544,"downloads_period":"last-month","dependent_repos_count":0,"rankings":{"downloads":16.886596715869345,"dependent_repos_count":68.90986571625571,"dependent_packages_count":7.381897931361508,"stargazers_count":7.066541520525614,"forks_count":11.988384955752213,"docker_downloads_count":null,"average":22.446657367952877},"purl":"pkg:pypi/py-data-juicer","advisories":[],"docker_usage_url":"https://docker.ecosyste.ms/usage/pypi/py-data-juicer","docker_dependents_count":null,"docker_downloads_count":null,"usage_url":"https://repos.ecosyste.ms/usage/pypi/py-data-juicer","dependent_repositories_url":"https://repos.ecosyste.ms/api/v1/usage/pypi/py-data-juicer/dependencies","status":null,"funding_links":[],"critical":null,"issue_metadata":{"last_synced_at":"2024-07-25T15:36:29.864Z","issues_count":43,"pull_requests_count":57,"avg_time_to_close_issue":863284.023255814,"avg_time_to_close_pull_request":112377.33333333333,"issues_closed_count":43,"pull_requests_closed_count":57,"pull_request_authors_count":9,"issue_authors_count":19,"avg_comments_per_issue":2.744186046511628,"avg_comments_per_pull_request":0.2982456140350877,"merged_pull_requests_count":56,"bot_issues_count":0,"bot_pull_requests_count":0,"past_year_issues_count":43,"past_year_pull_requests_count":57,"past_year_avg_time_to_close_issue":863284.023255814,"past_year_avg_time_to_close_pull_request":112377.33333333333,"past_year_issues_closed_count":43,"past_year_pull_requests_closed_count":57,"past_year_pull_request_authors_count":9,"past_year_issue_authors_count":19,"past_year_avg_comments_per_issue":2.744186046511628,"past_year_avg_comments_per_pull_request":0.2982456140350877,"past_year_bot_issues_count":0,"past_year_bot_pull_requests_count":0,"past_year_merged_pull_requests_count":56,"issues_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/modelscope%2Fdata-juicer/issues","maintainers":[{"login":"HYLcool","count":39,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/HYLcool"},{"login":"zhijianma","count":12,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/zhijianma"},{"login":"yxdyc","count":11,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/yxdyc"},{"login":"chenhesen","count":6,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/chenhesen"},{"login":"pan-x-c","count":2,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/pan-x-c"},{"login":"xieyxclack","count":2,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/xieyxclack"},{"login":"drcege","count":1,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/drcege"}],"active_maintainers":[{"login":"HYLcool","count":36,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/HYLcool"},{"login":"zhijianma","count":11,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/zhijianma"},{"login":"yxdyc","count":9,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/yxdyc"},{"login":"chenhesen","count":5,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/chenhesen"},{"login":"pan-x-c","count":1,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/pan-x-c"}]},"versions_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/py-data-juicer/versions","version_numbers_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/py-data-juicer/version_numbers","dependent_packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/py-data-juicer/dependent_packages","related_packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/py-data-juicer/related_packages","codemeta_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/py-data-juicer/codemeta","maintainers":[{"uuid":"data-juicer","login":"data-juicer","name":null,"email":null,"url":null,"packages_count":4,"html_url":"https://pypi.org/user/data-juicer/","role":null,"created_at":"2023-09-18T19:18:12.665Z","updated_at":"2023-09-18T19:18:12.665Z","packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/maintainers/data-juicer/packages"}],"registry":{"name":"pypi.org","url":"https://pypi.org","ecosystem":"pypi","default":true,"packages_count":824599,"maintainers_count":351804,"namespaces_count":0,"keywords_count":0,"github":"pypi","metadata":{"funded_packages_count":52857},"icon_url":"https://github.com/pypi.png","created_at":"2022-04-04T15:19:23.364Z","updated_at":"2026-03-18T07:56:03.051Z","packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages","maintainers_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/maintainers","namespaces_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/namespaces"}}],"commits":{"id":11577122,"full_name":"datajuicer/data-juicer","default_branch":"master","total_commits":528,"total_committers":40,"total_bot_commits":0,"total_bot_committers":0,"mean_commits":13.2,"dds":0.7102272727272727,"past_year_total_commits":223,"past_year_total_committers":25,"past_year_total_bot_commits":0,"past_year_total_bot_committers":0,"past_year_mean_commits":8.92,"past_year_dds":0.6771300448430493,"last_synced_at":"2026-02-15T03:57:17.960Z","last_synced_commit":"ae290f787b9aff31862ba89455c547530c86b446","created_at":"2025-11-24T10:33:12.857Z","updated_at":"2026-02-15T03:55:16.068Z","committers":[{"name":"Yilun Huang","email":"lielin.hyl@alibaba-inc.com","login":"HYLcool","count":153},{"name":"Daoyuan Chen","email":"67475544+yxdyc","login":"yxdyc","count":49},{"name":"BeachWang","email":"1400012807@pku.edu.cn","login":"BeachWang","count":48},{"name":"Cathy0908","email":"30484308+Cathy0908","login":"Cathy0908","count":41},{"name":"Ce Ge (戈策)","email":"gece@foxmail.com","login":"drcege","count":35},{"name":"cmgzn","email":"85746275+cmgzn","login":"cmgzn","count":30},{"name":"zhijianma","email":"zhijian.mzj@alibaba-inc.com","login":"zhijianma","count":30},{"name":"garyzhang99","email":"46197280+garyzhang99","login":"garyzhang99","count":17},{"name":"Cyrus Zhang","email":"cyrus.ylzhang@gmail.com","login":"cyruszhang","count":13},{"name":"chenhesen","email":"hesen.chs@alibaba-inc.com","login":"chenhesen","count":12},{"name":"Xuchen Pan","email":"32844285+pan-x-c","login":"pan-x-c","count":12},{"name":"Qirui-jiao","email":"156628817+Qirui-jiao","login":"Qirui-jiao","count":12},{"name":"Yuhan Liu","email":"30294295+liuyuhanalex","login":"liuyuhanalex","count":10},{"name":"co63oc","email":"co63oc","login":"co63oc","count":10},{"name":"Zhen Qin","email":"zhenqincn@gmail.com","login":"zhenqincn","count":7},{"name":"Xinyu Zhang","email":"60529799+xyuzh","login":"xyuzh","count":6},{"name":"chenyushuo","email":"297086016@qq.com","login":"chenyushuo","count":5},{"name":"Du Bin","email":"dubin555@gmail.com","login":"dubin555","count":4},{"name":"kyotom","email":"37698246+kyo-tom","login":"kyo-tom","count":4},{"name":"John Giorgi","email":"johnmgiorgi@gmail.com","login":"JohnGiorgi","count":3},{"name":"lingzhq","email":"145309613+lingzhq","login":"lingzhq","count":3},{"name":"2108038773","email":"101000927+2108038773","login":"2108038773","count":2},{"name":"JamieYu","email":"yu_haojia@foxmail.com","login":"TobyJasper","count":2},{"name":"ShenQianli","email":"shenqianli@u.nus.edu","login":"ShenQianli","count":2},{"name":"Yuexiang XIE","email":"yuexiang.xyx@alibaba-inc.com","login":"xieyxclack","count":2},{"name":"Shurui Kou","email":"1115059192@qq.com","login":"Dludora","count":2},{"name":"weijie","email":"34210233+shiweijiezero","login":"shiweijiezero","count":1},{"name":"simplaj","email":"39286060+simplaj","login":"simplaj","count":1},{"name":"seanzhang-zhichen","email":"74812416+seanzhang-zhichen","login":"seanzhang-zhichen","count":1},{"name":"ricksun2023","email":"128897743+ricksun2023","login":"ricksun2023","count":1},{"name":"panghu","email":"51791120+fanronghai","login":"fanronghai","count":1},{"name":"jackylee","email":"qcsd2011@gmail.com","login":"jackylee-ch","count":1},{"name":"Yanyi Liu","email":"wolfsonliu@163.com","login":"liuyanyi","count":1},{"name":"XinyuLiu1999","email":"77971825+XinyuLiu1999","login":"XinyuLiu1999","count":1},{"name":"Ruidong-X","email":"xuruidong@gmail.com","login":"xuruidong","count":1},{"name":"NuODaniel","email":"zhonghanjun@baidu.com","login":"danielhjz","count":1},{"name":"JONGHO LEE","email":"ljhljh0125@gmail.com","login":"JONGSKY","count":1},{"name":"HunterLine","email":"153903700@qq.com","login":"HunterLine","count":1},{"name":"HongCheng","email":"kwchenghong@gmail.com","login":"chg0901","count":1},{"name":"Alibaba OSS","email":"opensource@alibaba-inc.com","login":"alibaba-oss","count":1}],"past_year_committers":[{"name":"Yilun Huang","email":"lielin.hyl@alibaba-inc.com","login":"HYLcool","count":72},{"name":"cmgzn","email":"85746275+cmgzn","login":"cmgzn","count":30},{"name":"Cathy0908","email":"30484308+Cathy0908","login":"Cathy0908","count":29},{"name":"Cyrus Zhang","email":"cyrus.ylzhang@gmail.com","login":"cyruszhang","count":13},{"name":"Daoyuan Chen","email":"67475544+yxdyc","login":"yxdyc","count":13},{"name":"Qirui-jiao","email":"156628817+Qirui-jiao","login":"Qirui-jiao","count":10},{"name":"Yuhan Liu","email":"30294295+liuyuhanalex","login":"liuyuhanalex","count":9},{"name":"Haibin Wang","email":"1400012807@pku.edu.cn","login":"BeachWang","count":8},{"name":"Xinyu Zhang","email":"60529799+xyuzh","login":"xyuzh","count":6},{"name":"co63oc","email":"co63oc","login":"co63oc","count":6},{"name":"Du Bin","email":"dubin555@gmail.com","login":"dubin555","count":4},{"name":"kyotom","email":"37698246+kyo-tom","login":"kyo-tom","count":4},{"name":"John Giorgi","email":"johnmgiorgi@gmail.com","login":"JohnGiorgi","count":3},{"name":"ShenQianli","email":"shenqianli@u.nus.edu","login":"ShenQianli","count":2},{"name":"Zhen Qin","email":"zhenqincn@gmail.com","login":"zhenqincn","count":2},{"name":"lingzhq","email":"145309613+lingzhq","login":"lingzhq","count":2},{"name":"Shurui Kou","email":"1115059192@qq.com","login":"Dludora","count":2},{"name":"HunterLine","email":"153903700@qq.com","login":"HunterLine","count":1},{"name":"NuODaniel","email":"zhonghanjun@baidu.com","login":"danielhjz","count":1},{"name":"XinyuLiu1999","email":"77971825+XinyuLiu1999","login":"XinyuLiu1999","count":1},{"name":"Xuchen Pan","email":"32844285+pan-x-c","login":"pan-x-c","count":1},{"name":"chenyushuo","email":"297086016@qq.com","login":"chenyushuo","count":1},{"name":"garyzhang99","email":"46197280+garyzhang99","login":"garyzhang99","count":1},{"name":"panghu","email":"51791120+fanronghai","login":"fanronghai","count":1},{"name":"ricksun2023","email":"128897743+ricksun2023","login":"ricksun2023","count":1}],"commits_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories/datajuicer%2Fdata-juicer/commits","host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2026-03-30T00:00:08.107Z","repositories_count":6205572,"commits_count":928216359,"contributors_count":35823850,"owners_count":1143772,"icon_url":"https://github.com/github.png","host_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories"}},"issues":{"table":{"full_name":"datajuicer/data-juicer","html_url":"https://github.com/datajuicer/data-juicer","last_synced_at":"2025-12-31T06:09:01.426Z","status":null,"issues_count":0,"pull_requests_count":0,"avg_time_to_close_issue":null,"avg_time_to_close_pull_request":null,"issues_closed_count":0,"pull_requests_closed_count":0,"pull_request_authors_count":0,"issue_authors_count":0,"avg_comments_per_issue":null,"avg_comments_per_pull_request":null,"merged_pull_requests_count":0,"bot_issues_count":0,"bot_pull_requests_count":0,"past_year_issues_count":0,"past_year_pull_requests_count":0,"past_year_avg_time_to_close_issue":null,"past_year_avg_time_to_close_pull_request":null,"past_year_issues_closed_count":0,"past_year_pull_requests_closed_count":0,"past_year_pull_request_authors_count":0,"past_year_issue_authors_count":0,"past_year_avg_comments_per_issue":null,"past_year_avg_comments_per_pull_request":null,"past_year_bot_issues_count":0,"past_year_bot_pull_requests_count":0,"past_year_merged_pull_requests_count":0,"created_at":"2025-11-05T12:00:09.552Z","updated_at":"2025-12-31T06:09:01.427Z","repository_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/datajuicer%2Fdata-juicer","issues_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/datajuicer%2Fdata-juicer/issues","issue_labels_count":{"table":{}},"pull_request_labels_count":{"table":{}},"issue_author_associations_count":{"table":{}},"pull_request_author_associations_count":{"table":{}},"issue_authors":{"table":{}},"pull_request_authors":{"table":{}},"host":{"table":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2026-01-01T00:00:08.101Z","repositories_count":12545357,"issues_count":35617695,"pull_requests_count":117034435,"authors_count":11079382,"icon_url":"https://github.com/github.png","host_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories","owners_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/owners","authors_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors"}},"past_year_issue_labels_count":{"table":{}},"past_year_pull_request_labels_count":{"table":{}},"past_year_issue_author_associations_count":{"table":{}},"past_year_pull_request_author_associations_count":{"table":{}},"past_year_issue_authors":{"table":{}},"past_year_pull_request_authors":{"table":{}},"maintainers":[],"active_maintainers":[]}},"events":{"total":{"CreateEvent":10,"IssuesEvent":17,"WatchEvent":78,"DeleteEvent":8,"MemberEvent":1,"IssueCommentEvent":30,"PushEvent":111,"PullRequestReviewCommentEvent":65,"PullRequestReviewEvent":41,"PullRequestEvent":38,"ForkEvent":6},"last_year":{"CreateEvent":10,"IssuesEvent":17,"WatchEvent":78,"DeleteEvent":8,"MemberEvent":1,"IssueCommentEvent":30,"PushEvent":111,"PullRequestReviewCommentEvent":65,"PullRequestReviewEvent":41,"PullRequestEvent":38,"ForkEvent":6}},"keywords":["data","data-analysis","data-pipeline","data-processing","data-science","data-visualization","foundation-models","instruction-tuning","large-language-models","llm","llms","multi-modal","pre-training","synthetic-data"],"dependencies":[],"score":19.759112760345214,"created_at":"2024-07-25T15:26:12.203Z","updated_at":"2026-04-03T06:04:33.094Z","avatar_url":"https://github.com/datajuicer.png","language":"Python","codemeta":null,"publiccode":null,"project_url":"https://summary.ecosyste.ms/api/v1/projects/124619","html_url":"https://summary.ecosyste.ms/projects/124619"}