{"id":125457,"url":"https://github.com/libraryofcelsus/LLM_File_Parser","last_synced_at":"2026-06-19T15:30:18.986Z","repository":{"id":248939966,"uuid":"827924472","full_name":"libraryofcelsus/LLM_File_Parser","owner":"libraryofcelsus","description":"AutoML/Unstructured Data Processing for RAG and LLM Dataset Creation.  Current Database Options are: Qdrant or Marqo DB.","archived":false,"fork":false,"pushed_at":"2024-07-17T22:31:24.000Z","size":44,"stargazers_count":6,"open_issues_count":0,"forks_count":1,"subscribers_count":1,"default_branch":"main","last_synced_at":"2026-05-11T21:03:13.628Z","etag":null,"topics":["ai","automl","dataset-generation","llama3","llm","marqo","qdrant","rag","unstructured-data","vector-database"],"latest_commit_sha":null,"homepage":"","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"other","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/libraryofcelsus.png","metadata":{"files":{"readme":"readme.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE.lic","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null}},"created_at":"2024-07-12T17:16:19.000Z","updated_at":"2025-02-20T14:22:53.000Z","dependencies_parsed_at":"2024-07-18T02:27:34.158Z","dependency_job_id":"bef99620-d225-43a4-8d6e-615dede82137","html_url":"https://github.com/libraryofcelsus/LLM_File_Parser","commit_stats":null,"previous_names":["libraryofcelsus/llm_file_parser"],"tags_count":0,"template":false,"template_full_name":null,"purl":"pkg:github/libraryofcelsus/LLM_File_Parser","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/libraryofcelsus%2FLLM_File_Parser","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/libraryofcelsus%2FLLM_File_Parser/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/libraryofcelsus%2FLLM_File_Parser/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/libraryofcelsus%2FLLM_File_Parser/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/libraryofcelsus","download_url":"https://codeload.github.com/libraryofcelsus/LLM_File_Parser/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/libraryofcelsus%2FLLM_File_Parser/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":286080680,"owners_count":33082686,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2026-05-15T20:25:35.270Z","status":"ssl_error","status_checked_at":"2026-05-15T20:25:34.732Z","response_time":103,"last_error":"SSL_connect returned=1 errno=0 peeraddr=140.82.121.5:443 state=error: unexpected eof while reading","robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":false,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"owner":{"login":"libraryofcelsus","name":"Library of Celsus","uuid":"127127527","kind":"user","description":"","email":"","website":null,"location":null,"twitter":null,"company":null,"icon_url":"https://avatars.githubusercontent.com/u/127127527?u=ee18a21f046484871660fa9084a2da3499edc870\u0026v=4","repositories_count":1,"last_synced_at":"2023-04-12T22:40:23.461Z","metadata":{"has_sponsors_listing":false},"html_url":"https://github.com/libraryofcelsus","funding_links":[],"total_stars":null,"followers":null,"following":null,"created_at":"2023-04-12T22:40:23.472Z","updated_at":"2023-04-12T22:40:23.472Z","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/libraryofcelsus","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/libraryofcelsus/repositories"},"packages":[],"commits":{"id":1640916,"full_name":"libraryofcelsus/LLM_File_Parser","default_branch":"main","total_commits":2,"total_committers":1,"total_bot_commits":0,"total_bot_committers":0,"mean_commits":2.0,"dds":0.0,"past_year_total_commits":0,"past_year_total_committers":0,"past_year_total_bot_commits":0,"past_year_total_bot_committers":0,"past_year_mean_commits":0.0,"past_year_dds":0.0,"last_synced_at":"2026-06-16T14:01:00.378Z","last_synced_commit":"49b2f8bfc7432df332606b93fae9c66d79dcdc52","created_at":"2024-07-25T15:44:54.233Z","updated_at":"2026-06-16T14:00:42.809Z","committers":[{"name":"Library of Celsus","email":"127127527+libraryofcelsus","login":"libraryofcelsus","count":2}],"past_year_committers":[],"commits_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories/libraryofcelsus%2FLLM_File_Parser/commits","host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2026-06-18T00:00:09.052Z","repositories_count":6262379,"commits_count":876174777,"contributors_count":35070065,"owners_count":1168070,"icon_url":"https://github.com/github.png","host_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories"}},"issues":{"table":{"full_name":"libraryofcelsus/LLM_File_Parser","html_url":"https://github.com/libraryofcelsus/LLM_File_Parser","last_synced_at":"2026-05-22T12:14:42.842Z","status":"error","issues_count":0,"pull_requests_count":0,"avg_time_to_close_issue":null,"avg_time_to_close_pull_request":null,"issues_closed_count":0,"pull_requests_closed_count":0,"pull_request_authors_count":0,"issue_authors_count":0,"avg_comments_per_issue":null,"avg_comments_per_pull_request":null,"merged_pull_requests_count":0,"bot_issues_count":0,"bot_pull_requests_count":0,"past_year_issues_count":0,"past_year_pull_requests_count":0,"past_year_avg_time_to_close_issue":null,"past_year_avg_time_to_close_pull_request":null,"past_year_issues_closed_count":0,"past_year_pull_requests_closed_count":0,"past_year_pull_request_authors_count":0,"past_year_issue_authors_count":0,"past_year_avg_comments_per_issue":null,"past_year_avg_comments_per_pull_request":null,"past_year_bot_issues_count":0,"past_year_bot_pull_requests_count":0,"past_year_merged_pull_requests_count":0,"created_at":"2024-07-25T15:44:59.522Z","updated_at":"2026-05-22T12:14:42.842Z","repository_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/libraryofcelsus%2FLLM_File_Parser","issues_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/libraryofcelsus%2FLLM_File_Parser/issues","issue_labels_count":{"table":{}},"pull_request_labels_count":{"table":{}},"issue_author_associations_count":{"table":{}},"pull_request_author_associations_count":{"table":{}},"issue_authors":{"table":{}},"pull_request_authors":{"table":{}},"host":{"table":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2026-06-18T00:00:09.580Z","repositories_count":14815588,"issues_count":33105313,"pull_requests_count":109259805,"authors_count":11310559,"icon_url":"https://github.com/github.png","host_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories","owners_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/owners","authors_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors"}},"past_year_issue_labels_count":{"table":{}},"past_year_pull_request_labels_count":{"table":{}},"past_year_issue_author_associations_count":{"table":{}},"past_year_pull_request_author_associations_count":{"table":{}},"past_year_issue_authors":{"table":{}},"past_year_pull_request_authors":{"table":{}},"maintainers":[],"active_maintainers":[]}},"events":{"total":{"WatchEvent":3},"last_year":{}},"keywords":["ai","automl","dataset-generation","llama3","llm","marqo","qdrant","rag","unstructured-data","vector-database"],"dependencies":[{"ecosystem":"pypi","filepath":"requirements.txt","sha":null,"kind":"manifest","created_at":"2024-07-18T02:27:33.049Z","updated_at":"2024-07-18T02:27:33.049Z","repository_link":"https://github.com/libraryofcelsus/LLM_File_Parser/blob/main/requirements.txt","dependencies":[{"id":19171424044,"package_name":"aiosignal","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424045,"package_name":"aiofiles","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424046,"package_name":"aiohttp","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424047,"package_name":"openai","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424048,"package_name":"numpy","ecosystem":"pypi","requirements":"\u003c2.0.0","direct":true,"kind":"runtime","optional":false},{"id":19171424049,"package_name":"marqo","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424050,"package_name":"keyboard","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424051,"package_name":"pytesseract","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424052,"package_name":"ebooklib","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424053,"package_name":"beautifulsoup4","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424054,"package_name":"kivy","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424055,"package_name":"requests","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424056,"package_name":"pdf2image","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424057,"package_name":"qdrant-client","ecosystem":"pypi","requirements":"==1.9.1","direct":true,"kind":"runtime","optional":false},{"id":19171424058,"package_name":"sentence-transformers","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424059,"package_name":"pdfminer.six","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424060,"package_name":"opencv-python","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":19171424061,"package_name":"openai-whisper","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false}]}],"score":1.791759469228055,"created_at":"2024-07-25T15:26:43.163Z","updated_at":"2026-06-19T15:30:18.986Z","avatar_url":"https://github.com/libraryofcelsus.png","language":"Python","codemeta":null,"publiccode":null,"project_url":"https://summary.ecosyste.ms/api/v1/projects/125457","html_url":"https://summary.ecosyste.ms/projects/125457"}